Loading...
Loading...
02-reusable-code-python/utils/hwpx_generator.py
"""
HWPX 파일 생성 유틸리티 - ZIP 압축된 XML 기반 한글 문서 포맷
@source kcsi-smpa
@extracted 2026-02-15
@version 1.1.0
의존성:
- (없음, 표준 라이브러리만 사용)
사용법:
from utils.hwpx_generator import generate_hwpx
# 기본 문서 생성
path = generate_hwpx(
output_path="report.hwpx",
title="보고서 제목",
content="본문 내용",
)
# 테이블 포함 문서 생성
tables = [{
"title": "테이블 제목",
"col_widths": [1500, 2500, 2500],
"rows": [
[{"text": "헤더1", "is_header": True}, {"text": "헤더2", "is_header": True}],
[{"text": "값1"}, {"text": "값2"}],
],
}]
path = generate_hwpx("report.hwpx", "제목", "내용", tables=tables)
"""
import html
import logging
import zipfile
from datetime import datetime
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
def escape_xml(text: str) -> str:
"""XML 특수문자 이스케이프"""
return html.escape(text, quote=True)
def format_date(date: datetime | None = None) -> str:
"""날짜 포맷 (한국어)"""
if date is None:
date = datetime.now()
return date.strftime("%Y-%m-%d %H:%M")
# HWPX 기본 파일 템플릿
MIMETYPE = "application/hwp+zip"
VERSION_XML = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<hh:version xmlns:hh="urn:hancom:hwp:version:0.1">
<hh:application name="Hancom Office" version="11.0"/>
<hh:document version="1.1"/>
</hh:version>"""
SETTINGS_XML = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<hs:settings xmlns:hs="urn:hancom:hwp:settings:0.1">
<hs:carrtPos>0</hs:carrtPos>
</hs:settings>"""
CONTAINER_XML = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="Contents/content.hpf" media-type="application/hwp+xml"/>
</rootfiles>
</container>"""
def generate_manifest_xml() -> str:
"""META-INF/manifest.xml 생성"""
return """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<odf:manifest xmlns:odf="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0">
<odf:file-entry odf:full-path="/" odf:media-type="application/hwp+zip"/>
<odf:file-entry odf:full-path="version.xml" odf:media-type="application/xml"/>
<odf:file-entry odf:full-path="settings.xml" odf:media-type="application/xml"/>
<odf:file-entry odf:full-path="Contents/content.hpf" odf:media-type="application/xml"/>
<odf:file-entry odf:full-path="Contents/section0.xml" odf:media-type="application/xml"/>
</odf:manifest>"""
def generate_content_hpf() -> str:
"""Contents/content.hpf 생성"""
return """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<hpf:package xmlns:hpf="urn:hancom:hwp:hpf:0.1">
<hpf:compatibledocument target="hwp7.0"/>
<hpf:head>
<hpf:beginnum page="1" footnote="1" endnote="1" figure="1" table="1" equation="1"/>
<hpf:reflist>
<hpf:fontfaces>
<hpf:fontface name="한글" type="ttf">
<hpf:font>
<hpf:typeinfo familyType="2" weight="4" serif="0" proportion="0"/>
<hpf:substitutFont name="함초롬돋움"/>
</hpf:font>
</hpf:fontface>
<hpf:fontface name="영문" type="ttf">
<hpf:font>
<hpf:typeinfo familyType="2" weight="4" serif="0" proportion="0"/>
<hpf:substitutFont name="함초롬돋움"/>
</hpf:font>
</hpf:fontface>
</hpf:fontfaces>
<hpf:borderfills/>
<hpf:charprops>
<hpf:charprop height="1000" textcolor="0" shadecolor="4294967295" usekern="1" bold="0" italic="0"/>
</hpf:charprops>
<hpf:paraprops>
<hpf:paraprop align="0" vertalign="0" linewrap="0" autobreak="1"/>
</hpf:paraprops>
<hpf:styles/>
<hpf:memoes/>
</hpf:reflist>
</hpf:head>
<hpf:body>
<hpf:section href="Contents/section0.xml"/>
</hpf:body>
</hpf:package>"""
def text_to_paragraph(text: str, bold: bool = False, font_size: int = 1000) -> str:
"""텍스트를 HWPX 단락 요소로 변환"""
lines = text.split("\n")
paragraphs = []
for line in lines:
bold_tag = "<hp:b/>" if bold else ""
escaped_line = escape_xml(line) if line else " "
para = f"""
<hp:p>
<hp:run>
<hp:rPr><hp:sz val="{font_size}"/>{bold_tag}</hp:rPr>
<hp:t>{escaped_line}</hp:t>
</hp:run>
</hp:p>"""
paragraphs.append(para)
return "".join(paragraphs)
def create_table_row(cells: list[dict[str, Any]]) -> str:
"""테이블 행 생성
Args:
cells: [{"text": "내용", "is_header": True/False, "colspan": 1}]
"""
cells_xml = []
for cell in cells:
text = cell.get("text", "")
is_header = cell.get("is_header", False)
colspan = cell.get("colspan", 1)
header_style = '<hp:shd color="15132390"/>' if is_header else ""
bold_tag = "<hp:b/>" if is_header else ""
cell_xml = f"""
<hp:tc colspan="{colspan}">
<hp:tcPr>
<hp:cellMargin left="50" right="50" top="30" bottom="30"/>
{header_style}
<hp:tcBorders>
<hp:left type="single" width="2"/>
<hp:right type="single" width="2"/>
<hp:top type="single" width="2"/>
<hp:bottom type="single" width="2"/>
</hp:tcBorders>
</hp:tcPr>
<hp:p>
<hp:run>
<hp:rPr><hp:sz val="900"/>{bold_tag}</hp:rPr>
<hp:t>{escape_xml(text)}</hp:t>
</hp:run>
</hp:p>
</hp:tc>"""
cells_xml.append(cell_xml)
return f"<hp:tr>{''.join(cells_xml)}</hp:tr>"
def create_table(
rows: list[list[dict[str, Any]]],
col_widths: list[int],
) -> str:
"""테이블 생성
Args:
rows: 행 데이터 리스트
col_widths: 컬럼 너비 리스트
"""
grid_cols = "".join([f'<hp:gridCol width="{w}"/>' for w in col_widths])
rows_xml = "".join([create_table_row(row) for row in rows])
return f"""<hp:tbl>
<hp:tblPr>
<hp:tblMargin left="50" right="50" top="0" bottom="0"/>
</hp:tblPr>
<hp:tblGrid>
{grid_cols}
</hp:tblGrid>
{rows_xml}
</hp:tbl>"""
def generate_section_xml(
title: str,
content: str,
tables: list[dict[str, Any]] | None = None,
header_org: str = "KCSI 통계 보고서",
) -> str:
"""본문 섹션 XML 생성
Args:
title: 문서 제목
content: 본문 내용
tables: 테이블 데이터 리스트
header_org: 헤더 조직명
"""
print_date = format_date()
# 테이블 섹션 생성
tables_section = ""
if tables:
for table_data in tables:
table_title = table_data.get("title", "")
rows = table_data.get("rows", [])
col_widths = table_data.get(
"col_widths",
[1800] * len(rows[0]) if rows else [],
)
tables_section += text_to_paragraph(table_title, bold=True, font_size=1100)
tables_section += create_table(rows, col_widths)
tables_section += '<hp:p><hp:run><hp:t> </hp:t></hp:run></hp:p>'
return f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<hs:sec xmlns:hs="urn:hancom:hwp:section:0.1" xmlns:hp="urn:hancom:hwp:paragraph:0.1">
<hp:p>
<hp:pPr><hp:align val="center"/></hp:pPr>
<hp:run>
<hp:rPr><hp:sz val="1600"/><hp:b/></hp:rPr>
<hp:t>{escape_xml(header_org)}</hp:t>
</hp:run>
</hp:p>
<hp:p>
<hp:pPr><hp:align val="center"/></hp:pPr>
<hp:run>
<hp:rPr><hp:sz val="1400"/><hp:b/></hp:rPr>
<hp:t>{escape_xml(title)}</hp:t>
</hp:run>
</hp:p>
<hp:p><hp:run><hp:t> </hp:t></hp:run></hp:p>
<hp:p>
<hp:run>
<hp:rPr><hp:sz val="900"/></hp:rPr>
<hp:t>출력일시: {print_date}</hp:t>
</hp:run>
</hp:p>
<hp:p><hp:run><hp:t> </hp:t></hp:run></hp:p>
{text_to_paragraph(content, font_size=950)}
<hp:p><hp:run><hp:t> </hp:t></hp:run></hp:p>
{tables_section}
<hp:p><hp:run><hp:t> </hp:t></hp:run></hp:p>
<hp:p>
<hp:pPr><hp:align val="center"/></hp:pPr>
<hp:run>
<hp:rPr><hp:sz val="800"/></hp:rPr>
<hp:t>본 문서는 자동 생성되었습니다.</hp:t>
</hp:run>
</hp:p>
</hs:sec>"""
def generate_hwpx(
output_path: str | Path,
title: str,
content: str,
tables: list[dict[str, Any]] | None = None,
header_org: str = "KCSI 통계 보고서",
) -> Path:
"""HWPX 파일 생성
Args:
output_path: 출력 파일 경로
title: 문서 제목
content: 본문 내용
tables: 테이블 데이터 리스트
header_org: 헤더 조직명
Returns:
생성된 파일 경로
"""
output_path = Path(output_path)
if not output_path.suffix.lower() == ".hwpx":
output_path = output_path.with_suffix(".hwpx")
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
# mimetype은 압축 없이 첫 번째로 추가
zf.writestr("mimetype", MIMETYPE, compress_type=zipfile.ZIP_STORED)
# 기본 파일들
zf.writestr("version.xml", VERSION_XML)
zf.writestr("settings.xml", SETTINGS_XML)
# META-INF 폴더
zf.writestr("META-INF/container.xml", CONTAINER_XML)
zf.writestr("META-INF/manifest.xml", generate_manifest_xml())
# Contents 폴더
zf.writestr("Contents/content.hpf", generate_content_hpf())
zf.writestr(
"Contents/section0.xml",
generate_section_xml(title, content, tables, header_org),
)
logger.info("HWPX 파일 생성 완료: %s", output_path)
return output_path
# 사용 예시
if __name__ == "__main__":
# 테스트 데이터
test_tables = [
{
"title": "테스트 테이블",
"col_widths": [1500, 2500, 2500],
"rows": [
[
{"text": "항목", "is_header": True},
{"text": "값1", "is_header": True},
{"text": "값2", "is_header": True},
],
[
{"text": "데이터1"},
{"text": "100"},
{"text": "200"},
],
[
{"text": "데이터2"},
{"text": "150"},
{"text": "250"},
],
],
}
]
output = generate_hwpx(
output_path="test_output.hwpx",
title="테스트 보고서",
content="이것은 테스트 내용입니다.\n여러 줄로 작성할 수 있습니다.",
tables=test_tables,
header_org="KCSI 테스트",
)
print(f"생성 완료: {output}")