C++实现DBSCAN算法

April 06, 2016

关于对 DBSCAN 算法的学习推荐结合维基百科和百度百科,基本就可以看懂了。

/*
input: data.txt(x, y of points)

1.1 1.1
1.1 2.1
1.1 3.1
1.1 4.1
9 1
8 8
9 8
1.1 5.1
2.1 1.1
2.1 2.1
2.1 3.1
2.1 4.1
2.1 5.1
2.1 6.1
8 9
10 10
2.1 7.1

output: out.txt(x, y, clusterID)

9.000000 1.000000 5
10.000000 10.000000 16
2.100000 7.100000 1
1.100000 1.100000 1
1.100000 2.100000 1
1.100000 3.100000 1
1.100000 4.100000 1
8.000000 8.000000 6
9.000000 8.000000 6
1.100000 5.100000 1
2.100000 1.100000 1
2.100000 2.100000 1
2.100000 3.100000 1
2.100000 4.100000 1
2.100000 5.100000 1
2.100000 6.100000 1
8.000000 9.000000 6

*/


#include <iostream>
#include <cstdio>
#include <vector>
#include <cmath>
#include <queue>
#include <string>
#include <cstring>
using namespace std;

const double eps = 1.5;
const int MinPts = 2;

class point{
public:
    double x, y;
    int clusterID = 0;
    int type = 1;  //1:noise 2:border 3:core
    int pts = 0;   //number of points in eps of the point
    vector<int> corepts; //index of points in eps of the point
    int vis = 0;

    point(double a, double b, int c){
        x = a;
        y = b;
        clusterID = c;
    }
};

//distance
double getDis(point a, point b){
    return sqrt((a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y));
}

vector<point> openFile(){
    FILE *fp1 = freopen("data.txt", "r", stdin);
    vector<point> data;
    int i = 1;
    double x, y;
    while(~fscanf(fp1, "%lf", &x)){
        fscanf(fp1, "%lf", &y);
        point p(x, y, i++);
        data.push_back(p);
    }
    fclose(fp1);
    return data;
}

void DBSCAN(vector<point> data){
    int len = data.size();
    for(int i = 0; i < len; i++){
        for(int j = i+1; j < len; j++){
            if(getDis(data[i], data[j]) < eps){
                data[i].pts++;
                data[j].pts++;
            }
        }
    }
    vector<point> corePoint;
    for(int i = 0; i < len; i++){
        if(data[i].pts >= MinPts){
            data[i].type = 3;
            corePoint.push_back(data[i]);
        }
    }

    for(int i = 0; i < corePoint.size(); i++){
        for(int j = i+1; j < corePoint.size(); j++){
            if(getDis(corePoint[i], corePoint[j]) < eps){
                corePoint[i].corepts.push_back(j);
                corePoint[j].corepts.push_back(i);
            }
        }
    }

    //bfs: update the cluterID of points
    for(int i = 0; i < corePoint.size(); i++){
        if(!corePoint[i].vis){
            queue<point> q;
            q.push(corePoint[i]);
            corePoint[i].vis = 1;
            while(!q.empty()){
                point curP = q.front();
                q.pop();
                for(int j = 0; j < curP.corepts.size(); j++){
                    if(corePoint[curP.corepts[j]].vis) continue;
                    corePoint[curP.corepts[j]].clusterID = curP.clusterID;

                    q.push(corePoint[curP.corepts[j]]);
                    corePoint[curP.corepts[j]].vis = 1;
                }
            }
        }
    }

    //border point
    for(int i = 0; i < len; i++){
        if(data[i].type == 3) continue;
        for(int j = 0; j < corePoint.size(); j++){
            if(getDis(data[i], corePoint[j]) < eps){
                data[i].type = 2;
                data[i].clusterID = corePoint[j].clusterID;
                break;
            }
        }
    }


    //output
    FILE *fp2 = freopen("out.txt", "w", stdout);

    for(int i = 0; i < len; i++){
        if(data[i].type != 3)
            fprintf(fp2, "%f %f %d\n", data[i].x, data[i].y, data[i].clusterID);
    }
    for(int i = 0; i < corePoint.size(); i++){
        fprintf(fp2, "%f %f %d\n", corePoint[i].x, corePoint[i].y, corePoint[i].clusterID);
    }
    fclose(fp2);
}

int main(){
    vector<point> data = openFile();
    DBSCAN(data);
    return 0;
}

Profile picture

Written by Armin Li , a venture capitalist. [Weibo] [Subscribe]