Commit e1d344fd authored by Sebastian Baltes's avatar Sebastian Baltes
Browse files

Remove newlines in headings

parent 4407abd5
import logging
import re
import requests
from lxml import html
......@@ -38,7 +40,7 @@ class Venue(object):
for item in items:
if item.tag == "h2" or item.tag == "h3":
current_heading = item.text
current_heading = re.sub(r'\s+', ' ', item.text) # unify whitespaces (remove newlines)
elif item.tag == "li":
if current_heading == "":
# the following only works for conferences, not for journals
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment