将markdown转换为word的脚本

admin

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Markdown to Word Converter (简化版)
不依赖外部库的版本，适用于网络受限环境
功能：
1. 将Markdown转换为格式化的纯文本
2. 生成HTML版本（可复制粘贴到Word）
3. 不需要安装任何外部库
"""
import os
import sys
import re
from pathlib import Path
def markdown_to_text(md_content):
"""将Markdown内容转换为格式化的纯文本"""
lines = md_content.split('\n')
text_lines = []
for line in lines:
line = line.strip()
if not line:
text_lines.append('')
continue
# 处理标题
if line.startswith('#'):
level = len(line) - len(line.lstrip('#'))
title_text = line.lstrip('#').strip()
# 根据级别添加不同的格式
if level == 1:
text_lines.append('=' * 50)
text_lines.append(f"【主标题】 {title_text}")
text_lines.append('=' * 50)
elif level == 2:
text_lines.append('-' * 40)
text_lines.append(f"【二级标题】 {title_text}")
text_lines.append('-' * 40)
elif level == 3:
text_lines.append(f"【三级标题】 {title_text}")
text_lines.append('-' * 20)
else:
text_lines.append(f"{' ' * (level-1)}● {title_text}")
continue
# 处理列表项
if line.startswith(('- ', '* ', '+ ')):
list_text = line[2:].strip()
# 移除markdown格式符号
list_text = clean_markdown_formatting(list_text)
text_lines.append(f" • {list_text}")
continue
# 处理编号列表
if re.match(r'^\d+\.', line):
list_text = re.sub(r'^\d+\.\s*', '', line)
list_text = clean_markdown_formatting(list_text)
number = re.match(r'^(\d+)\.', line).group(1)
text_lines.append(f" {number}. {list_text}")
continue
# 处理代码块
if line.startswith('```'):
if '```' in line and len(line) > 3:
# 单行代码块
code = line.replace('```', '').strip()
text_lines.append(f"【代码】 {code}")
else:
text_lines.append('【代码块开始】')
continue
# 处理表格
if '|' in line and line.count('|') >= 2:
cells = [cell.strip() for cell in line.split('|')[1:-1]]
if cells:
table_line = ' | '.join(cells)
text_lines.append(f"【表格】 {table_line}")
continue
# 处理普通段落
if line:
clean_text = clean_markdown_formatting(line)
text_lines.append(clean_text)
return '\n'.join(text_lines)
def markdown_to_html(md_content):
"""将Markdown内容转换为简单的HTML"""
lines = md_content.split('\n')
html_lines = []
html_lines.append('''<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Markdown转换结果</title>
<style>
body { font-family: Arial, "Microsoft YaHei", sans-serif; line-height: 1.6; margin: 40px; }
h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; }
h2 { color: #34495e; border-bottom: 2px solid #bdc3c7; padding-bottom: 8px; }
h3 { color: #7f8c8d; }
ul, ol { margin: 10px 0; padding-left: 30px; }
li { margin: 5px 0; }
code { background-color: #f8f8f8; padding: 2px 5px; border-radius: 3px; }
pre { background-color: #f8f8f8; padding: 15px; border-radius: 5px; overflow-x: auto; }
table { border-collapse: collapse; width: 100%; margin: 20px 0; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background-color: #f2f2f2; }
strong { color: #2c3e50; }
em { color: #e74c3c; }
.warning { color: #e67e22; font-weight: bold; }
</style>
</head>
<body>''')
in_code_block = False
code_lines = []
for line in lines:
original_line = line
line = line.strip()
if not line and not in_code_block:
html_lines.append('<br>')
continue
# 处理代码块
if line.startswith('```'):
if in_code_block:
# 结束代码块
html_lines.append('<pre><code>')
html_lines.extend(code_lines)
html_lines.append('</code></pre>')
code_lines = []
in_code_block = False
else:
# 开始代码块
in_code_block = True
continue
if in_code_block:
code_lines.append(html_escape(original_line))
continue
# 处理标题
if line.startswith('#'):
level = len(line) - len(line.lstrip('#'))
title_text = line.lstrip('#').strip()
title_text = apply_html_formatting(title_text)
if level <= 6:
html_lines.append(f'<h{level}>{title_text}</h{level}>')
else:
html_lines.append(f'<h6>{title_text}</h6>')
continue
# 处理列表项
if line.startswith(('- ', '* ', '+ ')):
list_text = line[2:].strip()
list_text = apply_html_formatting(list_text)
html_lines.append(f'<ul><li>{list_text}</li></ul>')
continue
# 处理编号列表
if re.match(r'^\d+\.', line):
list_text = re.sub(r'^\d+\.\s*', '', line)
list_text = apply_html_formatting(list_text)
html_lines.append(f'<ol><li>{list_text}</li></ol>')
continue
# 处理表格
if '|' in line and line.count('|') >= 2:
cells = [cell.strip() for cell in line.split('|')[1:-1]]
if cells:
html_cells = [f'<td>{apply_html_formatting(cell)}</td>' for cell in cells]
html_lines.append(f'<table><tr>{"".join(html_cells)}</tr></table>')
continue
# 处理普通段落
if line:
formatted_text = apply_html_formatting(line)
html_lines.append(f'<p>{formatted_text}</p>')
html_lines.append('</body></html>')
return '\n'.join(html_lines)
def clean_markdown_formatting(text):
"""清理markdown格式符号"""
# 移除粗体
text = re.sub(r'\*\*(.*?)\*\*', r'【\1】', text)
# 移除斜体
text = re.sub(r'\*(.*?)\*', r'\1', text)
# 移除代码标记
text = re.sub(r'`(.*?)`', r'【代码：\1】', text)
# 移除链接，保留文本
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
# 保留警告符号
text = text.replace('⚠️', '【警告】')
return text
def apply_html_formatting(text):
"""应用HTML格式"""
# HTML转义
text = html_escape(text)
# 粗体
text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', text)
# 斜体
text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', text)
# 代码
text = re.sub(r'`(.*?)`', r'<code>\1</code>', text)
# 链接
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
# 警告符号
text = text.replace('⚠️', '<span class="warning">⚠️</span>')
return text
def html_escape(text):
"""HTML转义"""
return (text.replace('&', '&')
.replace('<', '<')
.replace('>', '>')
.replace('"', '"')
.replace("'", '''))
def main():
"""主函数"""
# 默认文件路径
default_md_file = r"D:\BaiduSyncdisk\api111\网站结构说明.md"
# 获取输入文件路径
if len(sys.argv) > 1:
md_file = Path(sys.argv[1])
else:
md_file = Path(default_md_file)
# 检查输入文件是否存在
if not md_file.exists():
print(f"错误: 文件不存在 - {md_file}")
print(f"请确保文件路径正确，或者将文件路径作为参数传递给脚本")
print(f"用法: python {sys.argv[0]} <markdown文件路径>")
return
print(f"📁 输入文件: {md_file}")
print("-" * 50)
try:
# 读取markdown文件
with open(md_file, 'r', encoding='utf-8') as f:
md_content = f.read()
print("✅ 文件读取成功")
# 生成纯文本版本
text_output = markdown_to_text(md_content)
text_file = md_file.with_suffix('.txt')
with open(text_file, 'w', encoding='utf-8') as f:
f.write(text_output)
print(f"📄 纯文本版本已生成: {text_file}")
# 生成HTML版本
html_output = markdown_to_html(md_content)
html_file = md_file.with_suffix('.html')
with open(html_file, 'w', encoding='utf-8') as f:
f.write(html_output)
print(f"🌐 HTML版本已生成: {html_file}")
print("\n📋 使用说明:")
print("1. 纯文本版本(.txt): 可以直接复制粘贴到任何文档")
print("2. HTML版本(.html): 用浏览器打开，然后复制粘贴到Word保持格式")
print(" - 在浏览器中打开生成的HTML文件")
print(" - 按Ctrl+A全选，然后Ctrl+C复制")
print(" - 在Word中按Ctrl+V粘贴，格式会自动保留")
print(f"\n✅ 转换完成！生成了2个文件:")
print(f" 📄 {text_file}")
print(f" 🌐 {html_file}")
except Exception as e:
print(f"❌ 转换失败: {str(e)}")
if __name__ == "__main__":
main()

复制代码

将markdown转换为word的脚本

浏览过的版块

admin LV9