From d112a8225a0c99841225f44e5dd60e178dc447d2 Mon Sep 17 00:00:00 2001
From: Julie Pichon <julie.pichon@gmail.com>
Date: Sun, 25 Oct 2009 14:11:40 +0000
Subject: [PATCH 2/2] Fix missing headings when retrieving article
---
infoslicer/processing/HTML_Parser.py | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/infoslicer/processing/HTML_Parser.py b/infoslicer/processing/HTML_Parser.py
index b99e754..adb6eb0 100644
a
|
b
|
class HTML_Parser: |
28 | 28 | #======================================================================= |
29 | 29 | # These lists are used at the parsing stage |
30 | 30 | root_node = "body" |
31 | | section_separators = ["h3", "h4", "h5"] |
32 | | reference_separators = ["h1", "h2"] |
| 31 | section_separators = ["h2", "h3", "h4", "h5"] |
| 32 | reference_separators = ["h1"] |
33 | 33 | block_elements = ["img", "table", "ol", "ul"] |
34 | 34 | #======================================================================= |
35 | 35 | |