| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- # -*- coding: utf-8 -*-
- """原始任务行 → [{"brand","product","spec"}, ...]"""
- import re
- _RAW = """
- 天和 骨贴膏通 11
- 天和 追风膏 9
- 天和 麝香壮骨膏 5
- 999 益血生胶囊 1
- 999 护肝片 1
- 999 消痔软膏 2
- 999 玻璃酸钠滴眼液 1
- 999 尪痹胶囊 1
- 999 益气清肺颗粒 1
- 999 气滞胃痛片 1
- 999 温经汤颗粒 1
- 999 气滞胃痛颗粒
- 好娃娃 小儿氨酚烷胺颗粒 2
- 澳诺 葡萄糖酸钙锌口服溶液 12
- 易善复 多烯磷脂酰胆碱胶囊 36 5
- 澳诺 维生素D滴剂 3
- 999 硝呋太尔制霉素阴道软胶囊 6粒 3
- 达因 维生素D滴剂 7
- 天和 骨贴膏通 15
- 天和 追风膏 25
- 天和 麝香壮骨膏 25
- 999 益血生胶囊
- 999 护肝片 3
- 999 玻璃酸钠滴眼液 1
- 999 尪痹胶囊 4
- 999 消痔软膏 4
- 999 益气清肺颗粒 1
- 999 气滞胃痛片
- 999 温经汤颗粒 3
- 999 气滞胃痛颗粒 20
- 好娃娃 小儿氨酚烷胺颗粒 7
- 澳诺 葡萄糖酸钙锌口服溶液 31
- 易善复 多烯磷脂酰胆碱胶囊 36 6
- 澳诺 维生素D滴剂 2
- 999 硝呋太尔制霉素阴道软胶囊 6粒 4
- 达因 维生素D滴剂 17
- 天和 骨贴膏通
- 天和 追风膏
- 天和
- 999 益血生胶囊
- 999 玻璃酸钠滴眼液 应取消
- 999 尪痹胶囊
- 999 益气清肺颗粒 应取消
- 999 气滞胃痛片 无
- 999 温经汤颗粒 应取消
- 999 气滞胃痛颗粒
- 好娃娃 小儿氨酚烷胺颗粒
- 澳诺 澳诺葡萄糖酸钙锌口服溶液
- 易善复 多烯磷脂酰胆碱胶囊
- 澳诺 澳诺维生素D滴剂 应取消
- 达因 伊可新维生素AD滴剂
- 水青 水青硝呋太尔制霉素阴道软胶囊 做企业判断
- 999 消痔软膏 1
- 999 护肝片 应取消
- """
- _NOTE_ENDINGS = frozenset({"应取消", "无", "做企业判断"})
- _SPEC_LAST = re.compile(r"^(\d+粒|\d+支|\d+片|\d+g|\d+G|\d+ml|\d+ML|\d+)$")
- def _parse_line(line):
- line = line.strip()
- if not line:
- return None
- parts = line.split()
- brand = parts[0]
- tail = parts[1:]
- if not tail:
- return {"brand": brand, "product": "", "spec": ""}
- while tail and tail[-1].isdigit():
- tail = tail[:-1]
- if not tail:
- return {"brand": brand, "product": "", "spec": ""}
- note = ""
- if tail[-1] in _NOTE_ENDINGS:
- note = tail[-1]
- tail = tail[:-1]
- if not tail:
- return {"brand": brand, "product": "", "spec": note}
- spec_bits = []
- if note:
- spec_bits.append(note)
- prod = tail[:]
- if len(prod) >= 2 and _SPEC_LAST.match(prod[-1]):
- spec_bits.insert(0, prod[-1])
- prod = prod[:-1]
- product = " ".join(prod)
- spec = " ".join(spec_bits) if spec_bits else ""
- return {"brand": brand, "product": product, "spec": spec}
- def _build_items(raw_text):
- out = []
- for line in raw_text.splitlines():
- row = _parse_line(line)
- if row:
- out.append(row)
- return out
- ITEMS = _build_items(_RAW)
- if __name__ == "__main__":
- import json
- print(json.dumps(ITEMS, ensure_ascii=False, indent=2))
|