最近想从hexo转移到typecho,但是typecho是没有一键导入md文章,手动导入又很烦,怎么办呢。于是我就想用 Python 来写一个自动解析md导入typecho的脚本。
于是就开始编码了,首先是用正则表达式提取,hexo的md头部是YAML格式的,只要解析 title date tags categories 就行了,这里就是tags和category难解析,他们可能不止一个标签。但也可能没有标签。
对于上面的解析,我采用两个判断,以及捕获异常的方式解析,tags和category的方式有一些不同。实现如下:
1 		# 标题提取
2        title = re.search(r'title: (.*?)\n', s, re.S).group(1)
3        # 时间转化时间截
4        date = re.search(r'date: (.*?)\n', s, re.S).group(1)
5        date = time.strptime(date, "%Y-%m-%d %H:%M:%S")
6        date = int(time.mktime(date))
7        try:
8            if not re.search(r'tags:[ ]*(.*?)\n', s).group(1):
9                if re.search(r'tags:[ ]*\n(.*?)\nca', s, re.S):
10                    items = re.search(r'tags:[ ]*\n(.*?)\nca', s, re.S).group(1)
11                    tags = re.findall(r'- (.*?)\n', items)
12                else:
13                    tags = ''
14            else:
15                tags = re.search(r'tags:[ ]*(.*?)\n', s).group(1)
16        except AttributeError as e:
17            print(e)
18            tags = ''
19
20        try:
21            if not re.search(r'categories:[ ]*(.*?)\n', s).group(1):
22                if re.search(r'categories:[ ]*\n(.*?)\n---', s, re.S):
23                    items = re.search(r'categories:[ ]*\n(.*?)\n---', s, re.S).group(1)
24                    categories = re.findall(r'- (.*?)\n', items)
25                else:
26                    categories = ''
27            else:
28                categories = re.search(r'categories:[ ]*(.*?)\n', s).group(1)
29        except AttributeError as e:
30            print(e)
31            categories = ''
32        # 正文提取
33        post = re.search(r'---\n\n(.*?)$', s, re.S).group(1)
Copy
这里的踩坑点主要是正则中的 \s 他不仅匹配一个空格,也可以是换行符等等,所以我采用了 [ ]* 匹配多个空格。如果匹配不到呢,则时会抛出异常 AttributeError 然后捕获异常,让 tags(category) = ‘’就行了。
然后是插表方面,连接之后,批量解析文件,将字段插入表中,这里需要插入三个表,分别是 typecho_metas typecho_contents typecho_relationships
typecho_contents 中插入文章内容,从 typecho_metas 中插入分类和标签,从 typecho_relationships 中建立文章和分类和标签的关系。
这里需要为 typecho_metas 表中的 name tpye 建立联合主键,避免重复插入。在重复插入时进行更新操作,使得 count = count + 1
1 def insert_post(self, file):
2        data = self.parse_hexo_md(file)
3        self.data = data
4        db = self.db
5        cur = self.cur
6        modified = int(time.mktime(time.localtime(os.stat('_posts/' + file).st_mtime)))
7        sql = '''
8        INSERT INTO typecho_contents(title,slug, created,modified, text,type,status,allowComment,allowFeed,allowPing,authorId) VALUES (%s,%s,%s,%s,%s,'post','publish',1,1,1,1) 
9        '''
10
11        try:
12            cur.execute(sql, (data[0], file.split('.md')[0], data[1], modified, data[4]))
13            db.commit()
14        except Exception as e:
15            print(e)
16            db.rollback()
17
18    def insert_tags_category(self):
19        data = self.data
20        cur = self.cur
21        # cur.execute('ALTER TABLE typecho_metas ADD UNIQUE KEY(name,type)')
22        sql = '''
23        INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'tag',1) ON DUPLICATE KEY UPDATE count = count + 1
24        '''
25        # tags导入
26        try:
27            # (title, date, tags, categories, '<!--markdown-->' + post)
28            if isinstance(data[2], list):
29                for i in data[2]:
30                    cur.execute(sql, (i, i))
31                    self.db.commit()
32            else:
33                if data[2]:
34                    cur.execute(sql, (data[2], data[2]))
35                    self.db.commit()
36        except pymysql.DatabaseError as e:
37            print(e)
38            self.db.rollback()
39
40        # category 导入
41        sql = '''
42                INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'category',1) ON DUPLICATE KEY UPDATE count = count + 1
43              '''
44        try:
45            # (title, date, tags, categories, '<!--markdown-->' + post)
46            if isinstance(data[3], list):
47                for i in data[3]:
48                    cur.execute(sql, (i, i))
49                    self.db.commit()
50            else:
51                if data[3]:
52                    cur.execute(sql, (data[3], data[3]))
53                    self.db.commit()
54        except pymysql.DatabaseError as e:
55            print(e)
56            self.db.rollback()
57
58    def relationships(self):
59        db = self.db
60        cur = self.cur
61        data = self.data
62        print('tag = ', data[2], 'type = ', type(data[2]), 'cet = ', data[3])
63        # 映射 tags
64        select_mid = '''
65                SELECT mid FROM typecho_metas WHERE name = %s AND type = %s
66            '''
67        select_cid = '''
68                        SELECT cid FROM typecho_contents WHERE title = %s
69                    '''
70        add_relationship = '''
71                INSERT INTO typecho_relationships(cid,mid) VALUES (%s,%s)
72        '''
73
74        try:
75            cur.execute(select_cid, (data[0]))
76
77            cid = cur.fetchall()[0][0]  # 获取 cid
78
79            if isinstance(data[2], list):
80                for i in data[2]:
81                    cur.execute(select_mid, (i, 'tag'))
82                    tu = cur.fetchall()
83                    # print('mid = ', tu[0][0])  # mid 获取
84                    mid = tu[0][0]
85
86                    cur.execute(add_relationship, (cid, mid))
87            else:
88                cur.execute(select_mid, (data[2], 'tag'))
89                tu = cur.fetchall()
90                print('mid = ', tu)  # mid 获取
91                mid = tu[0][0]
92                cur.execute(add_relationship, (cid, mid))
93        except pymysql.DatabaseError as e:
94            print(e)
95            db.rollback()
96        except IndexError as e:
97            print('不能建立关系', data[2])
98            return
99
100            # categories
101        # (title, date, tags, categories, '<!--markdown-->' + post)
102        try:
103            if isinstance(data[3], list):
104                for i in data[3]:
105                    cur.execute(select_mid, (i, 'category'))
106                    tu = cur.fetchall()
107                    # print('mid = ', tu[0][0])  # mid 获取
108                    mid = tu[0][0]
109
110                    cur.execute(add_relationship, (cid, mid))
111            else:
112                cur.execute(select_mid, (data[3], 'category'))
113                tu = cur.fetchall()
114                # print(tu)  # mid 获取
115                mid = tu[0][0]
116                cur.execute(add_relationship, (cid, mid))
117        except pymysql.DatabaseError as e:
118            print(e)
119            db.rollback()
120        except IndexError as e:
121            print('不能建立关系', data[3])
122            return
Copy
全部源码见:
欢迎交流

亲亲留个评论再走呗

正在加载评论区...