关于对 DBSCAN 算法的学习推荐结合维基百科和百度百科,基本就可以看懂了。
/*
input: data.txt(x, y of points)
1.1 1.1
1.1 2.1
1.1 3.1
1.1 4.1
9 1
8 8
9 8
1.1 5.1
2.1 1.1
2.1 2.1
2.1 3.1
2.1 4.1
2.1 5.1
2.1 6.1
8 9
10 10
2.1 7.1
output: out.txt(x, y, clusterID)
9.000000 1.000000 5
10.000000 10.000000 16
2.100000 7.100000 1
1.100000 1.100000 1
1.100000 2.100000 1
1.100000 3.100000 1
1.100000 4.100000 1
8.000000 8.000000 6
9.000000 8.000000 6
1.100000 5.100000 1
2.100000 1.100000 1
2.100000 2.100000 1
2.100000 3.100000 1
2.100000 4.100000 1
2.100000 5.100000 1
2.100000 6.100000 1
8.000000 9.000000 6
*/
#include <iostream>
#include <cstdio>
#include <vector>
#include <cmath>
#include <queue>
#include <string>
#include <cstring>
using namespace std;
const double eps = 1.5;
const int MinPts = 2;
class point{
public:
double x, y;
int clusterID = 0;
int type = 1; //1:noise 2:border 3:core
int pts = 0; //number of points in eps of the point
vector<int> corepts; //index of points in eps of the point
int vis = 0;
point(double a, double b, int c){
x = a;
y = b;
clusterID = c;
}
};
//distance
double getDis(point a, point b){
return sqrt((a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y));
}
vector<point> openFile(){
FILE *fp1 = freopen("data.txt", "r", stdin);
vector<point> data;
int i = 1;
double x, y;
while(~fscanf(fp1, "%lf", &x)){
fscanf(fp1, "%lf", &y);
point p(x, y, i++);
data.push_back(p);
}
fclose(fp1);
return data;
}
void DBSCAN(vector<point> data){
int len = data.size();
for(int i = 0; i < len; i++){
for(int j = i+1; j < len; j++){
if(getDis(data[i], data[j]) < eps){
data[i].pts++;
data[j].pts++;
}
}
}
vector<point> corePoint;
for(int i = 0; i < len; i++){
if(data[i].pts >= MinPts){
data[i].type = 3;
corePoint.push_back(data[i]);
}
}
for(int i = 0; i < corePoint.size(); i++){
for(int j = i+1; j < corePoint.size(); j++){
if(getDis(corePoint[i], corePoint[j]) < eps){
corePoint[i].corepts.push_back(j);
corePoint[j].corepts.push_back(i);
}
}
}
//bfs: update the cluterID of points
for(int i = 0; i < corePoint.size(); i++){
if(!corePoint[i].vis){
queue<point> q;
q.push(corePoint[i]);
corePoint[i].vis = 1;
while(!q.empty()){
point curP = q.front();
q.pop();
for(int j = 0; j < curP.corepts.size(); j++){
if(corePoint[curP.corepts[j]].vis) continue;
corePoint[curP.corepts[j]].clusterID = curP.clusterID;
q.push(corePoint[curP.corepts[j]]);
corePoint[curP.corepts[j]].vis = 1;
}
}
}
}
//border point
for(int i = 0; i < len; i++){
if(data[i].type == 3) continue;
for(int j = 0; j < corePoint.size(); j++){
if(getDis(data[i], corePoint[j]) < eps){
data[i].type = 2;
data[i].clusterID = corePoint[j].clusterID;
break;
}
}
}
//output
FILE *fp2 = freopen("out.txt", "w", stdout);
for(int i = 0; i < len; i++){
if(data[i].type != 3)
fprintf(fp2, "%f %f %d\n", data[i].x, data[i].y, data[i].clusterID);
}
for(int i = 0; i < corePoint.size(); i++){
fprintf(fp2, "%f %f %d\n", corePoint[i].x, corePoint[i].y, corePoint[i].clusterID);
}
fclose(fp2);
}
int main(){
vector<point> data = openFile();
DBSCAN(data);
return 0;
}