K-means Clustering Implementation in Cocoa, Objective C
Created By: Debasis Das
(Definition of KMean Clustering: Sourced from Wiki) k-means clustering is a method of vector quantization, originally from signal processing, that is popular for cluster analysis in data mining. k-means clustering aims to partition n observations into k clusters in which each observation belongs to the cluster with the nearest mean, serving as a prototype of the cluster. This results in a partitioning of the data space into Voronoi cells.
Predominantly Euclidean distance is used as a metric and variance is used as a measure of cluster scatter.
However in this sample app we will use a different logic to partition.
We are introducing a concept of magnetic force that would pull the points towards there cluster centroids making the clusters appear more distinctive.
This sample was created by me for understanding k-mean clustering and this sample code would not scale up beyond a couple of thousand points.
For production implementation use standard libraries that has been throughly researched.
Output Of the sample application/code is as follows
The above figure displays the 4 stages of visualization.
- Stage 1: Points distributed in a X/Y Plane
- Stage 2: Adding 4 Centroids to the X/Y Plane. Highlighted as Red Dots
- Stage 3: Each point in the X/Y Plane is evaluated to find the nearest centroid to the point and a line is drawn from the point to the nearest centroid. (This is the place where Euclidean distance logic is used by most clustering algorithm.
- Stage 4: Apply magnetic force to pull the points nearer to the Centroid it belongs to.
In addition to the above steps, once the clusters are formed, the centroid / cluster id must be updated for each point to query records based on cluster ids.
Fig 1: Stage 1: Points distributed in a X/Y Plane
Fig 2: Stage 2: Adding 4 Centroids to the X/Y Plane. Highlighted as Red Dots
Fig 3: Stage 3: Each point in the X/Y Plane is evaluated to find the nearest centroid to the point and a line is drawn from the point to the nearest centroid. (This is the place where Euclidean distance logic is used by most clustering algorithm.
Fig 4: Stage 4: Apply magnetic force to pull the points nearer to the Centroid it belongs to.
Fig 5: Stage 4: Applying an even stronger magnetic force
// // KSAppDelegate.h // CocoaClusters // // Created by Debasis Das on 7/29/14. // Copyright (c) 2014 Knowstack. All rights reserved. // #import <Cocoa/Cocoa.h> @interface KSAppDelegate : NSObject @property (assign) IBOutlet NSWindow *window; @property (nonatomic, retain) NSMutableArray *dataArray; @property (nonatomic, retain) NSMutableArray *centroidArray; @property (assign) IBOutlet NSView *placeholderView1; @property (assign) IBOutlet NSView *placeholderView2; @property (assign) IBOutlet NSView *placeholderView3; @property (assign) IBOutlet NSView *placeholderView4; //The four placeholders views will be used to demonstrate the 4 stages of this sample application +(float)distanceBetweenPointOne:(NSPoint)pointOne AndPointTwo:(NSPoint )pointTwo; @end
// // KSAppDelegate.m // CocoaClusters // // Created by Debasis Das on 7/29/14. // Copyright (c) 2014 Knowstack. All rights reserved. // #import "KSAppDelegate.h" #import "RandomDataView.h" //#import "CentroidDataView.h" @implementation KSAppDelegate @synthesize dataArray; - (void)applicationWillFinishLaunching:(NSNotification *)notification { [self setDataArray:[self getRandomData]]; [self setCentroidArray:[self getCentroidDataArray]]; } //Utility Method to create random data - (NSMutableArray *)getRandomData { NSMutableArray *pointArray = [[NSMutableArray alloc] init]; NSNumber *x; NSNumber *y; NSNumber *cx; NSNumber *cy; for (int i=0; i<3000; i++) { x =[NSNumber numberWithFloat:arc4random()%340]; y =[NSNumber numberWithFloat:arc4random()%350]; cx =[NSNumber numberWithFloat:0.0]; cy =[NSNumber numberWithFloat:0.0]; NSDictionary *dict = [NSMutableDictionary dictionaryWithObjectsAndKeys: x,@"x", y,@"y", cx,@"cx", cy,@"cy", nil]; [pointArray addObject:dict]; } return pointArray; } //Stage 1: Load Random Data in a X/Y Plane to demonstrate KMean Clustering -(IBAction)loadRandomData:(id)sender { RandomDataView *rdv = [[RandomDataView alloc] initWithFrame:[self.placeholderView1 bounds]]; [rdv setPointsArray:self.dataArray]; [self.placeholderView1 addSubview:rdv]; } -(NSMutableArray *)getCentroidDataArray { NSMutableArray *centroidPointsArray = [[NSMutableArray alloc] init]; float width = [self.placeholderView1 frame].size.width; float height = [self.placeholderView1 frame].size.height; NSDictionary *centroid1 = [NSDictionary dictionaryWithObjectsAndKeys: [NSNumber numberWithFloat:arc4random()%300],@"x", [NSNumber numberWithFloat:arc4random()%300],@"y", nil]; NSDictionary *centroid2 = [NSDictionary dictionaryWithObjectsAndKeys: [NSNumber numberWithFloat:arc4random()%300],@"x", [NSNumber numberWithFloat:arc4random()%300],@"y", nil]; NSDictionary *centroid3 = [NSDictionary dictionaryWithObjectsAndKeys: [NSNumber numberWithFloat:width/4],@"x", [NSNumber numberWithFloat:3*(height/4)],@"y", nil]; NSDictionary *centroid4 = [NSDictionary dictionaryWithObjectsAndKeys: [NSNumber numberWithFloat:(3* width/4)],@"x", [NSNumber numberWithFloat:(3* height/4)],@"y", nil]; [centroidPointsArray addObject:centroid1]; [centroidPointsArray addObject:centroid2]; [centroidPointsArray addObject:centroid3]; [centroidPointsArray addObject:centroid4]; return centroidPointsArray; } //The second stage is to create 4 random centroid points and add it to the X/Y Plane and highlight it in red color -(IBAction)addCentroids:(id)sender { CentroidDataView *rdv = [[CentroidDataView alloc] initWithFrame:[self.placeholderView2 bounds]]; [rdv setPointsArray:self.dataArray]; [rdv setCentroidsArray:[self centroidArray]]; [self.placeholderView2 addSubview:rdv]; } //Create lines from each point to the nearest centroids -(IBAction)addLinesToCentroids:(id)sender { CentroidWithLinesDataView *rdv = [[CentroidWithLinesDataView alloc] initWithFrame:[self.placeholderView3 bounds]]; [rdv setPointsArray:self.dataArray]; [rdv setCentroidsArray:[self centroidArray]]; [self.placeholderView3 addSubview:rdv]; }
//Compressing the cluster / attracting the points towards the centroid based on a g force along the same line
-(IBAction)compressCluster:(id)sender
{
float compressionFactor = [sender floatValue];
NSLog(@"compressionFactor %2.2f",compressionFactor);
NSMutableArray *dataArray = [self dataArray];
NSMutableArray *centroidArray = [self centroidArray];
for (NSDictionary *dict in dataArray)
{
float minimumDistance = 10000.0;
NSPoint nearestCentroid;
for (NSDictionary *centroidDict in centroidArray)
{
float newMinimumDistance = [KSAppDelegate distanceBetweenPointOne:NSMakePoint([[centroidDict objectForKey:@"x"] floatValue], [[centroidDict objectForKey:@"y"] floatValue]) AndPointTwo:NSMakePoint([[dict objectForKey:@"x"] floatValue], [[dict objectForKey:@"y"] floatValue])];
if (newMinimumDistance < minimumDistance)
{
minimumDistance = newMinimumDistance;
nearestCentroid = NSMakePoint([[centroidDict objectForKey:@"x"] floatValue], [[centroidDict objectForKey:@"y"] floatValue]);
}
}
[dict setValue:[NSNumber numberWithFloat:nearestCentroid.x] forKey:@"cx"];
[dict setValue:[NSNumber numberWithFloat:nearestCentroid.y] forKey:@"cy"];
}
for (NSMutableDictionary *mdict in dataArray)
{
float pointLengthFromOrigin = [KSAppDelegate distanceBetweenPointOne:
NSMakePoint([[mdict objectForKey:@"x"] floatValue], [[mdict objectForKey:@"y"] floatValue])
AndPointTwo:NSMakePoint(0, 0)];
float length = [KSAppDelegate distanceBetweenPointOne:
NSMakePoint([[mdict objectForKey:@"x"] floatValue], [[mdict objectForKey:@"y"] floatValue])
AndPointTwo:NSMakePoint([[mdict objectForKey:@"cx"] floatValue], [[mdict objectForKey:@"cy"] floatValue])];
float newLength = length * compressionFactor;
float angle;
float newX;
float newY;
if (([[mdict objectForKey:@"x"] floatValue] > [[mdict objectForKey:@"cx"] floatValue]) && ([[mdict objectForKey:@"y"] floatValue] > [[mdict objectForKey:@"cy"] floatValue]))
{
angle = atan2f(([[mdict objectForKey:@"y"] floatValue] - [[mdict objectForKey:@"cy"] floatValue]), ([[mdict objectForKey:@"x"] floatValue] - [[mdict objectForKey:@"cx"] floatValue]));
newX = [[mdict objectForKey:@"cx"] floatValue] + cosf(angle) * newLength;
newY = [[mdict objectForKey:@"cy"] floatValue] + sinf(angle) * newLength;
}
else if (([[mdict objectForKey:@"x"] floatValue] < [[mdict objectForKey:@"cx"] floatValue]) && ([[mdict objectForKey:@"y"] floatValue] < [[mdict objectForKey:@"cy"] floatValue]))
{
angle = atan2f(([[mdict objectForKey:@"cy"] floatValue] - [[mdict objectForKey:@"y"] floatValue]), ([[mdict objectForKey:@"cx"] floatValue] - [[mdict objectForKey:@"x"] floatValue]));
newX = [[mdict objectForKey:@"cx"] floatValue] - cosf(angle) * newLength;
newY = [[mdict objectForKey:@"cy"] floatValue] - sinf(angle) * newLength;
}
else if (([[mdict objectForKey:@"x"] floatValue] > [[mdict objectForKey:@"cx"] floatValue]) && ([[mdict objectForKey:@"y"] floatValue] < [[mdict objectForKey:@"cy"] floatValue]))
{
angle = atan2f(([[mdict objectForKey:@"cy"] floatValue] - [[mdict objectForKey:@"y"] floatValue]), ([[mdict objectForKey:@"x"] floatValue] - [[mdict objectForKey:@"cx"] floatValue]));
newX = [[mdict objectForKey:@"cx"] floatValue] + cosf(angle) * newLength;
newY = [[mdict objectForKey:@"cy"] floatValue] - sinf(angle) * newLength;
}
else if (([[mdict objectForKey:@"x"] floatValue] < [[mdict objectForKey:@"cx"] floatValue]) && ([[mdict objectForKey:@"y"] floatValue] > [[mdict objectForKey:@"cy"] floatValue]))
{
angle = atan2f(([[mdict objectForKey:@"y"] floatValue] - [[mdict objectForKey:@"cy"] floatValue]), ([[mdict objectForKey:@"cx"] floatValue] - [[mdict objectForKey:@"x"] floatValue]));
newX = [[mdict objectForKey:@"cx"] floatValue] - cosf(angle) * newLength;
newY = [[mdict objectForKey:@"cy"] floatValue] + sinf(angle) * newLength;
}
[mdict setValue:[NSNumber numberWithFloat:newX] forKey:@"x"];
[mdict setValue:[NSNumber numberWithFloat:newY] forKey:@"y"];
}
CompressedClusterView *rdv = [[CompressedClusterView alloc] initWithFrame:[self.placeholderView4 bounds]];
[rdv setPointsArray:self.dataArray];
[rdv setCentroidsArray:[self centroidArray]];
[self.placeholderView4 addSubview:rdv];
}
+(float)distanceBetweenPointOne:(NSPoint)pointOne AndPointTwo:(NSPoint )pointTwo { float distance = sqrtf(pow((pointOne.x - pointTwo.x),2)+pow((pointOne.y - pointTwo.y),2)); return distance; } @end
//
// RandomDataView.h
// CocoaClusters
//
// Created by Debasis Das on 7/29/14.
// Copyright (c) 2014 Knowstack. All rights reserved.
//
#import <Cocoa/Cocoa.h>
@interface RandomDataView : NSView
{
}
@property (nonatomic,retain) NSMutableArray *pointsArray;
@end
@interface CentroidDataView : NSView
{
}
@property (nonatomic,retain) NSMutableArray *pointsArray;
@property (nonatomic,retain) NSMutableArray *centroidsArray;
@end
@interface CentroidWithLinesDataView : NSView
{
}
@property (nonatomic,retain) NSMutableArray *pointsArray;
@property (nonatomic,retain) NSMutableArray *centroidsArray;
@end
@interface CompressedClusterView : NSView
{
}
@property (nonatomic,retain) NSMutableArray *pointsArray;
@property (nonatomic,retain) NSMutableArray *centroidsArray;
@end
// // RandomDataView.m // CocoaClusters // // Created by Debasis Das on 7/29/14. // Copyright (c) 2014 Knowstack. All rights reserved. // #import "RandomDataView.h" #import "KSAppDelegate.h" //Stage 1 KMean Clustering Random Data - Visualization in a X/Y Plane @implementation RandomDataView - (void)drawRect:(NSRect)dirtyRect { [super drawRect:dirtyRect]; NSGraphicsContext* gc = [NSGraphicsContext currentContext]; [gc saveGraphicsState]; for (NSDictionary *dict in self.pointsArray) { [[NSColor blackColor] setStroke]; [[NSColor yellowColor] setFill]; NSRect rect = NSMakeRect([[dict objectForKey:@"x"] intValue], [[dict objectForKey:@"y"] intValue], 4, 4); NSBezierPath* circlePath = [NSBezierPath bezierPath]; [circlePath appendBezierPathWithOvalInRect: rect]; [circlePath stroke]; [circlePath fill]; } [gc restoreGraphicsState]; } @end //Stage 2: Adding Centroids to the Random Data View @implementation CentroidDataView - (void)drawRect:(NSRect)dirtyRect { [super drawRect:dirtyRect]; NSGraphicsContext* gc = [NSGraphicsContext currentContext]; [gc saveGraphicsState]; for (NSDictionary *dict in self.pointsArray) { [[NSColor blackColor] setStroke]; [[NSColor yellowColor] setFill]; NSRect rect = NSMakeRect([[dict objectForKey:@"x"] intValue], [[dict objectForKey:@"y"] intValue], 4, 4); NSBezierPath* circlePath = [NSBezierPath bezierPath]; [circlePath appendBezierPathWithOvalInRect: rect]; [circlePath stroke]; [circlePath fill]; } [gc restoreGraphicsState]; //Draw the Centroid Points gc = [NSGraphicsContext currentContext]; [gc saveGraphicsState]; for (NSDictionary *dict in self.centroidsArray) { [[NSColor blackColor] setStroke]; [[NSColor redColor] setFill]; NSRect rect = NSMakeRect([[dict objectForKey:@"x"] intValue], [[dict objectForKey:@"y"] intValue], 20, 20); NSBezierPath* circlePath = [NSBezierPath bezierPath]; [circlePath appendBezierPathWithOvalInRect: rect]; [circlePath stroke]; [circlePath fill]; } [gc restoreGraphicsState]; } @end // Stage 3: Drawing Lines from individual points to the nearest centroids. // This does not scale up beyond a couple of thousand points. // It is wiser to use Euclidean logic @implementation CentroidWithLinesDataView - (void)drawRect:(NSRect)dirtyRect { [super drawRect:dirtyRect]; NSGraphicsContext* gc = [NSGraphicsContext currentContext]; [gc saveGraphicsState]; for (NSDictionary *dict in self.pointsArray) { [[NSColor blackColor] setStroke]; [[NSColor greenColor] setFill]; NSRect rect = NSMakeRect([[dict objectForKey:@"x"] intValue], [[dict objectForKey:@"y"] intValue], 4, 4); NSBezierPath* circlePath = [NSBezierPath bezierPath]; [circlePath appendBezierPathWithOvalInRect: rect]; [circlePath stroke]; [circlePath fill]; } [gc restoreGraphicsState]; //Draw the Centroid Points gc = [NSGraphicsContext currentContext]; [gc saveGraphicsState]; for (NSDictionary *dict in self.centroidsArray) { [[NSColor blackColor] setStroke]; [[NSColor redColor] setFill]; NSRect rect = NSMakeRect([[dict objectForKey:@"x"] intValue], [[dict objectForKey:@"y"] intValue], 20, 20); NSBezierPath* circlePath = [NSBezierPath bezierPath]; [circlePath appendBezierPathWithOvalInRect: rect]; [circlePath stroke]; [circlePath fill]; } [gc restoreGraphicsState]; //Draw the lines to centroid based on proximity for (NSDictionary *dict in self.pointsArray) { float minimumDistance = 10000.0; NSPoint nearestCentroid; for (NSDictionary *centroidDict in self.centroidsArray) { float newMinimumDistance = [KSAppDelegate distanceBetweenPointOne:NSMakePoint([[centroidDict objectForKey:@"x"] floatValue], [[centroidDict objectForKey:@"y"] floatValue]) AndPointTwo:NSMakePoint([[dict objectForKey:@"x"] floatValue], [[dict objectForKey:@"y"] floatValue])]; if (newMinimumDistance < minimumDistance) { minimumDistance = newMinimumDistance; nearestCentroid = NSMakePoint([[centroidDict objectForKey:@"x"] floatValue], [[centroidDict objectForKey:@"y"] floatValue]); } } NSBezierPath *bPath = [NSBezierPath bezierPath]; [bPath moveToPoint:NSMakePoint([[dict objectForKey:@"x"] floatValue], [[dict objectForKey:@"y"] floatValue])]; [bPath lineToPoint:nearestCentroid]; [bPath setLineWidth:0.50]; [bPath stroke]; // NSLog(@"%2.2f",minimumDistance); NSLog(@"%@",NSStringFromPoint(nearestCentroid)); } } @end //Stage 4: Compressed Cluster Visualization @implementation CompressedClusterView - (void)drawRect:(NSRect)dirtyRect { [super drawRect:dirtyRect]; NSGraphicsContext* gc = [NSGraphicsContext currentContext]; [gc saveGraphicsState]; for (NSDictionary *dict in self.pointsArray) { [[NSColor blackColor] setStroke]; [[NSColor greenColor] setFill]; NSRect rect = NSMakeRect([[dict objectForKey:@"x"] intValue], [[dict objectForKey:@"y"] intValue], 4, 4); NSBezierPath* circlePath = [NSBezierPath bezierPath]; [circlePath appendBezierPathWithOvalInRect: rect]; [circlePath stroke]; [circlePath fill]; } [gc restoreGraphicsState]; //Draw the Centroid Points gc = [NSGraphicsContext currentContext]; [gc saveGraphicsState]; for (NSDictionary *dict in self.centroidsArray) { [[NSColor blackColor] setStroke]; [[NSColor redColor] setFill]; NSRect rect = NSMakeRect([[dict objectForKey:@"x"] intValue], [[dict objectForKey:@"y"] intValue], 20, 20); NSBezierPath* circlePath = [NSBezierPath bezierPath]; [circlePath appendBezierPathWithOvalInRect: rect]; [circlePath stroke]; [circlePath fill]; } [gc restoreGraphicsState]; } @end
Leave a Reply