"The government solution to a problem is usually as bad as the problem." -- Milton Friedman
Note: Updated 11/20/05 See note at bottom.
What with Google Maps, MSN Virtual Earth, and now Yahoo Maps getting into the picture, there's been a lot of current interest in geocoding. The word geocoding has many definitions, depending on the scientific domain one is in, but in this case, it means "The activity of defining the position of geographical objects relative to a standard reference grid." In other words, from a mapping point of view, its latitude and longitude map coordinates.
Many sites use the IP2Country arrangement to help keep track of where their visitors are coming from, but only the most expensive of these databases actually provide a central geocoded point of latitude and longitude for each country. Almost all the databases I have seen rely on file - based lookups or require you to put the data into a database.
My approach here is different. I am building on an earlier technique I highlighted on this site some time ago to embed the entire lookup database into the library assembly as an embedded, compressed resource. This is then extracted, decompressed in memory, and used to populate an ADO.NET DataTable at runtime in order to perform lightning - fast lookups in memory. In addition, the DataTables are cached so that this process only needs to occur the first time the library is used. The resulting library, which contains a database of over 76,000 rows of geocoded IP Address ranges and country codes is only about 500KB - well within the tolerance range of what could be called a "lightweight", fast-loading.NET utility library!
The first step to produce a library of this type is to find the data. I found an IP Address to Country database in MS Access that suited the bill. You can find a number of these around for free, you just have to be a GG ("Good Googler"). The other part was finding a list of country codes with the accompanying central latitudes and longitudes for each country so they can be located with on of the popular mapping APIs. There was no point in merging these two "tables" since the IP Address range table has multiple rows for each country - so I created two separate resources.
I compressed ("zipped") the IP Address to Country table using ICSharpCode Zip library, saved it in the solution as "IP2Country.dat", and marked it as an embedded resource. The other file was so small, I didn't even bother to compress it, and saved it as "CountryLatLong.txt".
Next, I have some code that will extract a resource from an assembly and optionally decompress it using the ICSharpCode Zip library. In order to save space, I only include the portions of the library needed to decompress a zipped file.
Finally, I have some code that takes these resources and, through easy-to-understand string manipulation, creates DataTables from them and caches them for re-use. Decompressed resources are simply pipe (|) - delimited rows of data that are split using the pipe character as the delimiter. After that there is a simple routine that takes a dotted-quad format IP address and converts it to type long for lookup in the datatable, which returns the two - character country code. This is then used for a second lookup in the other table to return the latitude and longitude of the country, and finally a third quick lookup to get the country's "friendly name". The results are stored in public properties of the library for easy access by the caller.
Now that we've got the basics, let's look at the actual code:
using System;
using System.Collections;
using System.Data;
using System.Diagnostics;
using System.IO;
using System.Net;
using System.Reflection;
using System.Text;
using System.Web;
using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
namespace Ip2CountryLib
{
public class CountryLookup
{
private string theCountries = String.Empty;
private string theLatLong = String.Empty;
private static string[] countryCodes =
{
"AP", "EU", "AD", "AE", "AF" ....
};
private static string[] countryNames =
{
"Asia/Pacific Region", "Europe", "Andorra", "United Arab Emirates", "Afghanistan", ...
};
private Hashtable htCountryNames = new Hashtable();
private DataTable IP2CountryTable = new DataTable();
private DataTable CountryLatLongTable = new DataTable();
private string foundCountry;
private string foundCountryLong;
private decimal latitude;
private decimal longitude;
public string FoundCountryLong
{
get { return foundCountryLong; }
set { foundCountryLong = value; }
}
public Decimal Latitude
{
get { return latitude; }
set { latitude = value; }
}
public Decimal Longitude
{
get { return longitude; }
set { longitude = value; }
}
public string FoundCountry
{
get { return foundCountry; }
}
private long addrToNum(IPAddress addr)
{
long ipnum = 0;
byte[] b = BitConverter.GetBytes(addr.Address);
for (int i = 0; i < 4; ++i)
{
long y = b[i];
if (y < 0)
{
y += 256;
}
ipnum += y << ((3 - i)*8);
}
//Console.WriteLine(ipnum);
return ipnum;
}
public void FindCountryByDottedIp(string dottedQuadIp)
{
IPAddress addr = IPAddress.Parse(dottedQuadIp);
long num = addrToNum(addr);
foreach (DataRow row in IP2CountryTable.Rows)
{
if (Convert.ToDouble(row[0]) <= num && Convert.ToDouble(row[1]) >= num)
{
this.foundCountry = row[2].ToString();
DataRow[] fndLats = this.CountryLatLongTable.Select("CountryCode='" + this.foundCountry + "'");
DataRow fndLat = fndLats[0];
this.foundCountryLong = (string) this.htCountryNames[foundCountry];
this.latitude = Convert.ToDecimal(fndLat[1]);
this.longitude = Convert.ToDecimal(fndLat[2]);
break;
}
} // end foreach
} //end method
public CountryLookup()
{
for (int i = 0; i < CountryLookup.countryCodes.Length; i++)
{
this.htCountryNames.Add(CountryLookup.countryCodes[i], CountryLookup.countryNames[i]);
}
if (HttpContext.Current.Cache["CountryLatLongTable"] == null)
{
theLatLong = GetDecompressedResourceString("IP2CountryLib.CountryLatLong.txt",false);
this.CountryLatLongTable.Columns.Add("CountryCode", typeof (string));
this.CountryLatLongTable.Columns.Add("Latitude", typeof (string));
this.CountryLatLongTable.Columns.Add("Longitude", typeof (string));
CountryLatLongTable.Columns["CountryCode"].Unique = true;
string[] countries = theLatLong.Split(new char[] {'\n'});
for (int i = 0; i < countries.Length; i++)
{
object[] theRow = countries[i].Split(new Char[] {'|'});
CountryLatLongTable.Rows.Add(theRow);
}
CountryLatLongTable.AcceptChanges();
HttpContext.Current.Cache["CountryLatLongTable"] = CountryLatLongTable;
}
else
{
CountryLatLongTable = (DataTable) HttpContext.Current.Cache["CountryLatLongTable"];
}
if (HttpContext.Current.Cache["IP2CountryTable"] == null)
{
theCountries = GetDecompressedResourceString("IP2CountryLib.IP2Country.dat",true);
IP2CountryTable.Columns.Add("BeginIp", typeof (long));
IP2CountryTable.Columns.Add("EndIp", typeof (long));
IP2CountryTable.Columns.Add("CountryCode", typeof (string));
string[] countries = theCountries.Split(new Char[] {'\n'});
for (int i = 0; i < countries.Length; i++)
{
countries[i] = countries[i].Replace("\r", ""); // cleanup
object[] theRow = countries[i].Split(new Char[] {'|'});
IP2CountryTable.Rows.Add(theRow);
}
IP2CountryTable.AcceptChanges();
HttpContext.Current.Cache["IP2CountryTable"] = IP2CountryTable;
}
else
{
IP2CountryTable = (DataTable) HttpContext.Current.Cache["IP2CountryTable"];
}
}
private string GetDecompressedResourceString(string resource,bool IsCompressed)
{
string theResource=String.Empty;
byte[] bytInput = null;
Stream stm=null;
BinaryReader br=null;
try
{
Assembly asm = Assembly.GetExecutingAssembly();
stm = asm.GetManifestResourceStream(resource);
br = new BinaryReader(stm);
long siz = stm.Length;
bytInput = br.ReadBytes((int) siz);
if(IsCompressed)
{
theResource = Decompress(bytInput);
}
else
{
theResource=System.Text.Encoding.UTF8.GetString(bytInput) ;
}
}
catch (Exception ex)
{
throw new ApplicationException(ex.Message);
}
finally
{
br.Close();
stm.Close();
}
return theResource;
}
private string Decompress(byte[] bytInput)
{
string strResult = "";
int totalLength = 0;
byte[] writeData = new byte[4096];
Stream s2 = new InflaterInputStream(new MemoryStream(bytInput));
try
{
while (true)
{
int size = s2.Read(writeData, 0, writeData.Length);
if (size > 0)
{
totalLength += size;
strResult += Encoding.UTF8.GetString(writeData, 0,size);
}
else
{
break;
}
}
s2.Close();
return strResult;
}
catch
{
throw;
}
}
}
} |
|