首页 > 大神们,求个算法

大神们,求个算法

现有如下数据结构:

data = {
  "date": {
    "buckets": [
      {
        "key": "2015-01-01",
        "shop": {
          "buckets": [
            {
              "key": "A",
              "sum_qty": {
                "value": 5
              },
              "sum_amt": {
                "value": 10
              }
            }
          ]
        }
      },
      {
        "key": "2015-06-01",
        "shop": {
          "buckets": [
            {
              "key": "B",
              "sum_qty": {
                "value": 20
              },
              "sum_amt": {
                "value": 100
              }
            }
          ]
        }
      }
    ]
  }
}

最终结果:

lst = [
    {'date': '2015-01-01', 'shop': 'A', 'sum_qty': 5, 'sum_amt': 10},
    {'date': '2015-06-01', 'shop': 'B', 'sum_qty': 20, 'sum_amt': 100}
]
print lst

寫了一個 class, 結果應該跟你要的一樣:

from collections import abc

class CoolJSON:

    def __init__(self, key, mapping):
        """
        key is the main key of this CoolJSON
        mapping is the total data
        """
        self.key = key
        self.mapping = dict(mapping)


    def collect_bucket_item(self, item):
        """ used to handle single bucket item"""
        dic = {}
        lst = [{}]

        for key, value in item.items():
            if key=='key':
                dic[self.key] = value
            elif isinstance(value, abc.MutableMapping):
                if 'buckets' in value:      
                    lst = CoolJSON(key, value).collect()
                elif 'value' in value:
                    dic[key] = value['value']

        for item in lst:
            item.update(dic)

        return lst


    def collect(self):
        """used to collect results from all bucket items"""
        results = []
        for item in self.mapping['buckets']:
            results.extend(self.collect_bucket_item(item))
        return results

測試:

# 測資使用 prolife 在評論下方新給的測資

lst = CoolJSON('date', data['date']).collect()

for item in lst:
    print(item)

結果:

{'sum_amt': 3651755, 'sum_qty': 36927, 'date': '2015-02-01', 'shop': 'A'}
{'sum_amt': 436019, 'sum_qty': 12115, 'date': '2015-02-01', 'shop': 'B'}
{'sum_amt': 1310549, 'sum_qty': 8896, 'date': '2015-02-01', 'shop': 'C'}
{'sum_amt': 18620841, 'sum_qty': 212909, 'date': '2015-03-01', 'shop': 'A'}
{'sum_amt': 5105368, 'sum_qty': 40109, 'date': '2015-03-01', 'shop': 'B'}
{'sum_amt': 938102, 'sum_qty': 28156, 'date': '2015-03-01', 'shop': 'C'}

我知道這可能不是你要的答案, 不過我不是很清楚你遇到的資料的狀況, 所以我先拋出一版, 再來看怎麼調整到可以處理你的需求, 以下是一個把這份資料想得很單純的無腦寫法:

lst = []

for item in data['date']['buckets']:
    dic = {}
    dic['date'] = item['key']
    dic['shop'] = item['shop']['buckets'][0]['key']
    dic['sum_qty'] = item['shop']['buckets'][0]['sum_qty']['value']
    dic['sum_amt'] = item['shop']['buckets'][0]['sum_amt']['value']
    lst.append(dic)

print(lst)

我回答過的問題: Python-QA


试试 json_path_rw 吧
原版是 http://goessner.net/articles/...
json_path_rw - https://github.com/kennknowle...

from jsonpath_rw import parse


# data = {...你的数据...}
data = []
lst = []

match = parse("$.date.buckets[*].key|(shop.buckets)").find(data)
# print [m.value for m in match]
# [
#     '2015-01-01',
#     {'key': 'A', 'sum_amt': {'value': 10}, 'sum_qty': {'value': 5}},
#     '2015-06-01',
#     {'key': 'B', 'sum_amt': {'value': 100}, 'sum_qty': {'value': 20}}
# ]
iter_match = iter(match)
for imatch in iter_match:
    idate = imatch.value
    # 偷懒分组
    imatch = iter_match.next()
    ibuckets = imatch.value
    # print "[idate]", idate
    # print "[ibuckets]", ibuckets
    # [idate] 2015-01-01
    # [ibuckets] {'sum_amt': {'value': 10}, 'key': 'A', 'sum_qty': {'value': 5}}
    # [idate] 2015-06-01
    # [ibuckets] {'sum_amt': {'value': 100}, 'key': 'B', 'sum_qty': {'value': 20}}
    buckets_match = parse("$[*].key|(sum_qty.value)|(sum_amt.value)").find(ibuckets)
    # [WARNING]: 多组,这里还要展开
    ishop, isum_qty, isum_amt = [m.value for m in parse("$[*].key|(sum_qty.value)|(sum_amt.value)").find(ibuckets)]
    lst.append({
        'date': idate,
        'shop': ishop,
        'sum_qty': isum_qty,
        'sum_amt': isum_amt,
    })
print "[lst]", lst

elasticsearch 这种层次比较深的数据适合 xxxpath 技术访问



好厉害,题目没看懂,答案也没看懂

【热门文章】
【热门文章】