现有如下数据结构:
data = {
"date": {
"buckets": [
{
"key": "2015-01-01",
"shop": {
"buckets": [
{
"key": "A",
"sum_qty": {
"value": 5
},
"sum_amt": {
"value": 10
}
}
]
}
},
{
"key": "2015-06-01",
"shop": {
"buckets": [
{
"key": "B",
"sum_qty": {
"value": 20
},
"sum_amt": {
"value": 100
}
}
]
}
}
]
}
}
最终结果:
lst = [
{'date': '2015-01-01', 'shop': 'A', 'sum_qty': 5, 'sum_amt': 10},
{'date': '2015-06-01', 'shop': 'B', 'sum_qty': 20, 'sum_amt': 100}
]
print lst
寫了一個 class, 結果應該跟你要的一樣:
from collections import abc
class CoolJSON:
def __init__(self, key, mapping):
"""
key is the main key of this CoolJSON
mapping is the total data
"""
self.key = key
self.mapping = dict(mapping)
def collect_bucket_item(self, item):
""" used to handle single bucket item"""
dic = {}
lst = [{}]
for key, value in item.items():
if key=='key':
dic[self.key] = value
elif isinstance(value, abc.MutableMapping):
if 'buckets' in value:
lst = CoolJSON(key, value).collect()
elif 'value' in value:
dic[key] = value['value']
for item in lst:
item.update(dic)
return lst
def collect(self):
"""used to collect results from all bucket items"""
results = []
for item in self.mapping['buckets']:
results.extend(self.collect_bucket_item(item))
return results
測試:
# 測資使用 prolife 在評論下方新給的測資
lst = CoolJSON('date', data['date']).collect()
for item in lst:
print(item)
結果:
{'sum_amt': 3651755, 'sum_qty': 36927, 'date': '2015-02-01', 'shop': 'A'}
{'sum_amt': 436019, 'sum_qty': 12115, 'date': '2015-02-01', 'shop': 'B'}
{'sum_amt': 1310549, 'sum_qty': 8896, 'date': '2015-02-01', 'shop': 'C'}
{'sum_amt': 18620841, 'sum_qty': 212909, 'date': '2015-03-01', 'shop': 'A'}
{'sum_amt': 5105368, 'sum_qty': 40109, 'date': '2015-03-01', 'shop': 'B'}
{'sum_amt': 938102, 'sum_qty': 28156, 'date': '2015-03-01', 'shop': 'C'}
我知道這可能不是你要的答案, 不過我不是很清楚你遇到的資料的狀況, 所以我先拋出一版, 再來看怎麼調整到可以處理你的需求, 以下是一個把這份資料想得很單純的無腦寫法:
lst = []
for item in data['date']['buckets']:
dic = {}
dic['date'] = item['key']
dic['shop'] = item['shop']['buckets'][0]['key']
dic['sum_qty'] = item['shop']['buckets'][0]['sum_qty']['value']
dic['sum_amt'] = item['shop']['buckets'][0]['sum_amt']['value']
lst.append(dic)
print(lst)
我回答過的問題: Python-QA
试试 json_path_rw 吧
原版是 http://goessner.net/articles/...
json_path_rw - https://github.com/kennknowle...
from jsonpath_rw import parse
# data = {...你的数据...}
data = []
lst = []
match = parse("$.date.buckets[*].key|(shop.buckets)").find(data)
# print [m.value for m in match]
# [
# '2015-01-01',
# {'key': 'A', 'sum_amt': {'value': 10}, 'sum_qty': {'value': 5}},
# '2015-06-01',
# {'key': 'B', 'sum_amt': {'value': 100}, 'sum_qty': {'value': 20}}
# ]
iter_match = iter(match)
for imatch in iter_match:
idate = imatch.value
# 偷懒分组
imatch = iter_match.next()
ibuckets = imatch.value
# print "[idate]", idate
# print "[ibuckets]", ibuckets
# [idate] 2015-01-01
# [ibuckets] {'sum_amt': {'value': 10}, 'key': 'A', 'sum_qty': {'value': 5}}
# [idate] 2015-06-01
# [ibuckets] {'sum_amt': {'value': 100}, 'key': 'B', 'sum_qty': {'value': 20}}
buckets_match = parse("$[*].key|(sum_qty.value)|(sum_amt.value)").find(ibuckets)
# [WARNING]: 多组,这里还要展开
ishop, isum_qty, isum_amt = [m.value for m in parse("$[*].key|(sum_qty.value)|(sum_amt.value)").find(ibuckets)]
lst.append({
'date': idate,
'shop': ishop,
'sum_qty': isum_qty,
'sum_amt': isum_amt,
})
print "[lst]", lst
elasticsearch 这种层次比较深的数据适合 xxxpath 技术访问
好厉害,题目没看懂,答案也没看懂