#!/usr/bin/python
#coding=utf-8
探索OSM的文件格式(Node对象)。
在线获取OpenStreetMap区域地图数据,转为GeoPandas,并最终转为shp格式文件。
OSM为xml格式,解析文件结构使用强大的requests数据下载包,网页和xml分析神器BeautifulSoup。
**注意:**由于BeautifulSoup将数据读到内存处理,因此不适合大数据量的处理。from bs4 import BeautifulSoup as bs
import requests as req
from pprint import *
直接下载到内存。不推荐使用,因为如果网络中断,需要重新下载。
url = "http://api.openstreetmap.org/api/0.6/map?bbox=11.54,48.14,11.543,48.145"
try:
r = req.get(url)
print(r)
except Exception as ex:
print("Error:",ex)
使用wget -c下载OSM数据,保存到本地文件,然后载入。
!wget -c -O osm_test.osm "http://api.openstreetmap.org/api/0.6/map?bbox=11.54,48.14,11.543,48.145"
--2016-05-04 14:59:47-- http://api.openstreetmap.org/api/0.6/map?bbox=11.54,48.14,11.543,48.145
正在解析主机 api.openstreetmap.org (api.openstreetmap.org)... 193.63.75.99, 193.63.75.100, 193.63.75.103, ...
正在连接 api.openstreetmap.org (api.openstreetmap.org)|193.63.75.99|:80... 已连接。
已发出 HTTP 请求,正在等待回应... 200 OK
长度: 未指定 [text/xml]
正在保存至: “osm_test.osm”
osm_test.osm [ <=> ] 2.67M 87.2KB/s in 39s
2016-05-04 15:00:27 (71.0 KB/s) - “osm_test.osm” 已保存 [2799533]
查看文件列表。可到当前目录去查看内容,由于文件较大,不要在本页面直接打开。
!ls -l -h
总用量 2.8M
-rw-rw-r-- 1 supermap supermap 24K 5月 4 15:02 osm-discovery.ipynb
-rw-rw-r-- 1 supermap supermap 5.0K 4月 24 17:45 osm-overpass.ipynb
-rw-rw-r-- 1 supermap supermap 15K 4月 23 08:23 osm-tag2json.ipynb
-rw-rw-r-- 1 supermap supermap 2.7M 5月 4 15:00 osm_test.osm
直接读取本地文件,获得范围信息。
#bsr = bs(atext,"lxml")
bsr = bs(open("osm_test.osm"),"lxml")
mbr = bsr.find_all('bounds')
print(mbr)
[<bounds maxlat="48.1450000" maxlon="11.5430000" minlat="48.1400000" minlon="11.5400000"></bounds>]
获得osm文件中所有的node对象。
nodelist = bsr.find_all('node')
print("All Nodes:",len(nodelist),", list 0-5:")
pprint(nodelist[0:5])
All Nodes: 1864 , list 0-5:
[<node changeset="34651972" id="398692" lat="48.1452196" lon="11.5414971" timestamp="2015-10-15T10:53:28Z" uid="2290263" user="soemisch" version="20" visible="true">
<tag k="tmc" v="DE:35375"></tag>
</node>,
<node changeset="34904180" id="1956100" lat="48.1434822" lon="11.5487963" timestamp="2015-10-27T14:01:37Z" uid="2385132" user="MENTZ_TU" version="43" visible="true">
<tag k="tmc" v="DE:61453"></tag>
<tag k="TMC:cid_58:tabcd_1:Class" v="Point"></tag>
<tag k="TMC:cid_58:tabcd_1:Direction" v="positive"></tag>
<tag k="TMC:cid_58:tabcd_1:LCLversion" v="9.00"></tag>
<tag k="TMC:cid_58:tabcd_1:LocationCode" v="35356"></tag>
<tag k="TMC:cid_58:tabcd_1:NextLocationCode" v="35357"></tag>
<tag k="TMC:cid_58:tabcd_1:PrevLocationCode" v="35355"></tag>
</node>,
<node changeset="10842011" id="21565151" lat="48.1414994" lon="11.5522715" timestamp="2012-03-01T20:37:08Z" uid="342705" user="KonB" version="4" visible="true"></node>,
<node changeset="9695595" id="21585828" lat="48.1445431" lon="11.5384205" timestamp="2011-10-30T16:47:12Z" uid="534662" user="Q12" version="17" visible="true"></node>,
<node changeset="9883923" id="60300474" lat="48.1406915" lon="11.5502820" timestamp="2011-11-20T13:24:04Z" uid="64536" user="Michael Forster" version="4" visible="true"></node>]
查看node的数据结构。
node = nodelist[0]
print(node)
<node changeset="34651972" id="398692" lat="48.1452196" lon="11.5414971" timestamp="2015-10-15T10:53:28Z" uid="2290263" user="soemisch" version="20" visible="true">
<tag k="tmc" v="DE:35375"></tag>
</node>
node.attrs
{'changeset': '34651972',
'id': '398692',
'lat': '48.1452196',
'lon': '11.5414971',
'timestamp': '2015-10-15T10:53:28Z',
'uid': '2290263',
'user': 'soemisch',
'version': '20',
'visible': 'true'}
解析Node的属性,以K:V存储的值。
for (k,v) in node.attrs.items():
print(k,":",v)
lon : 11.5414971
user : soemisch
timestamp : 2015-10-15T10:53:28Z
id : 398692
uid : 2290263
version : 20
visible : true
changeset : 34651972
lat : 48.1452196
将nodelist转换为Pandas.DataFrame,为了便于显示,只处理了5个node。
import pandas as pd
nodelist2 = []
for node in nodelist[0:10]:
nodelist2.append(node.attrs)
#print(nodelist2)
df = pd.DataFrame(nodelist2)
df
changeset | id | lat | lon | timestamp | uid | user | version | visible | |
---|---|---|---|---|---|---|---|---|---|
0 | 34651972 | 398692 | 48.1452196 | 11.5414971 | 2015-10-15T10:53:28Z | 2290263 | soemisch | 20 | true |
1 | 34904180 | 1956100 | 48.1434822 | 11.5487963 | 2015-10-27T14:01:37Z | 2385132 | MENTZ_TU | 43 | true |
2 | 10842011 | 21565151 | 48.1414994 | 11.5522715 | 2012-03-01T20:37:08Z | 342705 | KonB | 4 | true |
3 | 9695595 | 21585828 | 48.1445431 | 11.5384205 | 2011-10-30T16:47:12Z | 534662 | Q12 | 17 | true |
4 | 9883923 | 60300474 | 48.1406915 | 11.5502820 | 2011-11-20T13:24:04Z | 64536 | Michael Forster | 4 | true |
5 | 2434259 | 256554156 | 48.1431978 | 11.5197388 | 2009-09-10T10:34:54Z | 127922 | w3box | 4 | true |
6 | 11085110 | 256554158 | 48.1432360 | 11.5170168 | 2012-03-24T14:42:27Z | 342705 | KonB | 5 | true |
7 | 9505942 | 256554152 | 48.1420008 | 11.5383182 | 2011-10-08T19:22:24Z | 334153 | Alexander Roalter | 4 | true |
8 | 30794039 | 1423405650 | 48.1398728 | 11.5447444 | 2015-05-04T23:26:30Z | 354141 | Anoniman | 2 | true |
9 | 9212407 | 1423405651 | 48.1399051 | 11.5444005 | 2011-09-04T20:47:20Z | 17085 | cfaerber | 1 | true |
将Pandas.DataFrame转为GeoPandas.DataFrame,点生成为GeoSeries。
注意: 需要安装shapely和geopandas包。在anaconda先运行source activate GISpark,然后安装:
conda install -y -c https://conda.anaconda.org/conda-forge fiona
conda install -y -c https://conda.anaconda.org/conda-forge gdal
conda install -y -c https://conda.anaconda.org/conda-forge geopandas
conda install -y -c https://conda.anaconda.org/conda-forge geojson
from shapely.geometry import (Point, LinearRing, LineString, Polygon, MultiPoint)
from geopandas import GeoSeries, GeoDataFrame
from geopandas.base import GeoPandasBase
def node2pandas(nodelist):
nodelist2 = []
for node in nodelist[0:10]:
nodelist2.append(node.attrs)
df = pd.DataFrame(nodelist2)
return df
def pandas2geopandas(nodelist):
pass
def node2geopandas(nodelist):
df = node2pandas(nodelist)
ps = []
ps0 = [1]
for index, row in df.iterrows():
#print(index,':',row['lat'],'-',row['lon'])
ps0[0] = Point(float(row['lon']),float(row['lat']))
ps.append(ps0[0])
gs = GeoSeries(ps,crs={'init': 'epsg:4326', 'no_defs': True})
geodf = GeoDataFrame({'id' : df["id"],'user' : df["id"],
'lon' : df["lon"],'lat' : df["lat"],
'timestamp' : df["timestamp"],'uid' : df["uid"],'version' : df["version"],
'geometry' : gs
})
return geodf
gdf = node2geopandas(nodelist)
gdf
geometry | id | lat | lon | timestamp | uid | user | version | |
---|---|---|---|---|---|---|---|---|
0 | POINT (11.5414971 48.1452196) | 398692 | 48.1452196 | 11.5414971 | 2015-10-15T10:53:28Z | 2290263 | 398692 | 20 |
1 | POINT (11.5487963 48.1434822) | 1956100 | 48.1434822 | 11.5487963 | 2015-10-27T14:01:37Z | 2385132 | 1956100 | 43 |
2 | POINT (11.5522715 48.1414994) | 21565151 | 48.1414994 | 11.5522715 | 2012-03-01T20:37:08Z | 342705 | 21565151 | 4 |
3 | POINT (11.5384205 48.1445431) | 21585828 | 48.1445431 | 11.5384205 | 2011-10-30T16:47:12Z | 534662 | 21585828 | 17 |
4 | POINT (11.550282 48.1406915) | 60300474 | 48.1406915 | 11.5502820 | 2011-11-20T13:24:04Z | 64536 | 60300474 | 4 |
5 | POINT (11.5197388 48.1431978) | 256554156 | 48.1431978 | 11.5197388 | 2009-09-10T10:34:54Z | 127922 | 256554156 | 4 |
6 | POINT (11.5170168 48.143236) | 256554158 | 48.1432360 | 11.5170168 | 2012-03-24T14:42:27Z | 342705 | 256554158 | 5 |
7 | POINT (11.5383182 48.1420008) | 256554152 | 48.1420008 | 11.5383182 | 2011-10-08T19:22:24Z | 334153 | 256554152 | 4 |
8 | POINT (11.5447444 48.1398728) | 1423405650 | 48.1398728 | 11.5447444 | 2015-05-04T23:26:30Z | 354141 | 1423405650 | 2 |
9 | POINT (11.5444005 48.1399051) | 1423405651 | 48.1399051 | 11.5444005 | 2011-09-04T20:47:20Z | 17085 | 1423405651 | 1 |
保存为shape格式文件。
filename = "osm_test.shp"
gdf.to_file(filename)
查看一下文件列表。
!ls -l -h
总用量 2.8M
-rw-rw-r-- 1 supermap supermap 25K 5月 4 15:17 osm-discovery.ipynb
-rw-rw-r-- 1 supermap supermap 5.0K 4月 24 17:45 osm-overpass.ipynb
-rw-rw-r-- 1 supermap supermap 15K 4月 23 08:23 osm-tag2json.ipynb
-rw-rw-r-- 1 supermap supermap 10 5月 4 15:17 osm_test.cpg
-rw-rw-r-- 1 supermap supermap 5.8K 5月 4 15:17 osm_test.dbf
-rw-rw-r-- 1 supermap supermap 2.7M 5月 4 15:00 osm_test.osm
-rw-rw-r-- 1 supermap supermap 380 5月 4 15:17 osm_test.shp
-rw-rw-r-- 1 supermap supermap 180 5月 4 15:17 osm_test.shx