Q：找出IP属于哪个子网的最有效的方法是什么？

Suppose I have billions of records that contain IPv4 field each. And I want to find out:

1. If each record belongs to one of the subnet I concerns
2. Which subnet it belongs to if it satisfies requirement 1.

Each concerned subnet is defined as several masks(61.232.85.0/25, 61.232.86.0/27) and maybe some discrete IPs, and I have a total number of 1000 subnet definitions to deal with.

So, What's the most efficient algorithm and/or data structure to handle the job?

Also, I've find Trie a possible solution, any suggestions?

1. If each record belongs to one of the subnet I concerns
2. Which subnet it belongs to if it satisfies requirement 1.

You could represent the subnets as int values, put them all in an array, sort it and then use a binary search on that. Although longs might be better since then you don't have to deal with negative numbers in the binary search. That would take a maximum of 10 steps to find a subnet. A trie might take e.g. 25 steps for a /25 subnet. Also remember that 1000 longs is only 8 kb. That will easily fit into the CPU cache, making it really fast. And of course you could then use a second array that stores which subnet each of the masks belongs to.

Here is an example in Scala `findMaskIdx` finds the index of a given mask (the ip part of the subnet definition) using a binary search. If it can't find anything it returns the index of the first mask that's larger than the one it searched for. `findIpIdx` takes an ip address and returns the index of the subnet definition it belongs to or -1 if nothing is found.

`findIpIdx` can be run around 100 to 200 million times per second. So it seems to be quite fast. There is only one problem with this approach. If two subnets of different size overlap the code might find the wrong one. But I hope that shouldn't be too difficult to fix.

``````def ipStringToInt(s: String): Int = {
var ip = 0
for(num <- s.split("\\.")) {
ip = ip * 256 + num.toInt
}
ip
}

def parseSubnet(s: String): (Long, Int) = {
}

val subnetGroups = Vector(
Vector("61.232.85.0/25", "61.232.86.0/27"),
Vector("123.234.12.24/16", "1.2.3.4"),
Vector("61.232.87.5", "253.2.0.0/16")
)

val subnetData = (for {
(group, idx) <- subnetGroups.zipWithIndex

val groupNr: Array[Int] = subnetData.map(_._3).toArray

def findMaskIdx(ip: Long): Int = {
var low = 0
while(high > low) {
val mid = (low + high)/2
if(masks(mid) > ip) high = mid
else if(masks(mid) < ip) low = mid + 1
else return mid
}
low
}

def findIpIdx(ip: Int): Int = {
val ipLong = ip & 0xFFFFFFFFL
idx -= 1
if(idx < 0) return -1
val m = (0xFFFFFFFF00000000L >>> maskLengths(idx)) & 0xFFFFFFFFL
if((m & masks(idx)) == (m & ipLong)) return idx
return -1
}

println(subnetData.map {
})
println()

println()

def testIP(ipString: String) {
println("ipString = " + ipString)
val ip = ipStringToInt(ipString)
val dataIdx = findIpIdx(ip)
println("dataIdx = " + dataIdx)
if(dataIdx >= 0) {
val data = subnetData(dataIdx)
println("data = " + (subnetData(dataIdx) match {
}))
}
println()
}

testIP("61.232.86.12")
testIP("253.2.100.253")
testIP("253.3.0.0")
``````

You could represent the subnets as int values, put them all in an array, sort it and then use a binary search on that. Although longs might be better since then you don't have to deal with negative numbers in the binary search. That would take a maximum of 10 steps to find a subnet. A trie might take e.g. 25 steps for a /25 subnet. Also remember that 1000 longs is only 8 kb. That will easily fit into the CPU cache, making it really fast. And of course you could then use a second array that stores which subnet each of the masks belongs to.

Here is an example in Scala `findMaskIdx` finds the index of a given mask (the ip part of the subnet definition) using a binary search. If it can't find anything it returns the index of the first mask that's larger than the one it searched for. `findIpIdx` takes an ip address and returns the index of the subnet definition it belongs to or -1 if nothing is found.

`findIpIdx` can be run around 100 to 200 million times per second. So it seems to be quite fast. There is only one problem with this approach. If two subnets of different size overlap the code might find the wrong one. But I hope that shouldn't be too difficult to fix.

``````def ipStringToInt(s: String): Int = {
var ip = 0
for(num <- s.split("\\.")) {
ip = ip * 256 + num.toInt
}
ip
}

def parseSubnet(s: String): (Long, Int) = {
}

val subnetGroups = Vector(
Vector("61.232.85.0/25", "61.232.86.0/27"),
Vector("123.234.12.24/16", "1.2.3.4"),
Vector("61.232.87.5", "253.2.0.0/16")
)

val subnetData = (for {
(group, idx) <- subnetGroups.zipWithIndex

val groupNr: Array[Int] = subnetData.map(_._3).toArray

def findMaskIdx(ip: Long): Int = {
var low = 0
while(high > low) {
val mid = (low + high)/2
if(masks(mid) > ip) high = mid
else if(masks(mid) < ip) low = mid + 1
else return mid
}
low
}

def findIpIdx(ip: Int): Int = {
val ipLong = ip & 0xFFFFFFFFL
idx -= 1
if(idx < 0) return -1
val m = (0xFFFFFFFF00000000L >>> maskLengths(idx)) & 0xFFFFFFFFL
if((m & masks(idx)) == (m & ipLong)) return idx
return -1
}

println(subnetData.map {
})
println()

println()

def testIP(ipString: String) {
println("ipString = " + ipString)
val ip = ipStringToInt(ipString)
val dataIdx = findIpIdx(ip)
println("dataIdx = " + dataIdx)
if(dataIdx >= 0) {
val data = subnetData(dataIdx)
println("data = " + (subnetData(dataIdx) match {
}))
}
println()
}

testIP("61.232.86.12")
testIP("253.2.100.253")
testIP("253.3.0.0")
``````

That is real easy. Convert the IP to an unsigned 32 integer. Then if (IP1 & Mask) = (IP2 & Mask), where mask is the smaller of the two masks.

``````using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string IP1str = "61.232.85.0/25";
cIP IP1 = new cIP(IP1str);
string IP2str = "61.232.86.0/27";
cIP IP2 = new cIP(IP2str);
string IP3str = "61.232.85.0/22";
cIP IP3 = new cIP(IP3str);
string IP4str = "61.232.86.0/22";
cIP IP4 = new cIP(IP4str);

bool sameNet = (new cIP()).InSubnet(IP3str, IP4str);
}
}
public class cIP
{
public UInt32 IP { get; set; }
public string IPHex { get; set; }
public UInt32 mask { get; set; }

public cIP(){}

public cIP(string IPstr)
{
string maskStr = IPstr.Substring(IPstr.LastIndexOf("/") + 1);
IPstr = IPstr.Substring(0,IPstr.LastIndexOf("/"));
string[] IParray = IPstr.Split(new char[] { '.' });
for (int i = 3; i >= 0; i--)
{
IP = IP + (UInt32.Parse(IParray[3 - i]) << (i * 8));
}
IPHex = IP.ToString("x8");
}
public bool InSubnet(string IP1str, string IP2str)
{
bool results = false;
cIP IP1 = new cIP(IP1str);
cIP IP2 = new cIP(IP2str);

{
}
else
{
}

return results;
}
public bool InSubnet(cIP IP1, cIP IP2)
{
bool results = false;
{
}
else
{
}

return results;
}

}

}
​``````

``````using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string IP1str = "61.232.85.0/25";
cIP IP1 = new cIP(IP1str);
string IP2str = "61.232.86.0/27";
cIP IP2 = new cIP(IP2str);
string IP3str = "61.232.85.0/22";
cIP IP3 = new cIP(IP3str);
string IP4str = "61.232.86.0/22";
cIP IP4 = new cIP(IP4str);

bool sameNet = (new cIP()).InSubnet(IP3str, IP4str);
}
}
public class cIP
{
public UInt32 IP { get; set; }
public string IPHex { get; set; }
public UInt32 mask { get; set; }

public cIP(){}

public cIP(string IPstr)
{
string maskStr = IPstr.Substring(IPstr.LastIndexOf("/") + 1);
IPstr = IPstr.Substring(0,IPstr.LastIndexOf("/"));
string[] IParray = IPstr.Split(new char[] { '.' });
for (int i = 3; i >= 0; i--)
{
IP = IP + (UInt32.Parse(IParray[3 - i]) << (i * 8));
}
IPHex = IP.ToString("x8");
}
public bool InSubnet(string IP1str, string IP2str)
{
bool results = false;
cIP IP1 = new cIP(IP1str);
cIP IP2 = new cIP(IP2str);

{
}
else
{
}

return results;
}
public bool InSubnet(cIP IP1, cIP IP2)
{
bool results = false;
{
}
else
{