python split多个分隔符

快乐的老鼠 发表于 2018-8-4 11:28:52

　　1.split不支持同时使用多种字符做分隔符，如果想实现这样的效果，可以用re，例如：
>>> s = 'Hello!This?Is!What?I!Want'　　
>>> s
　　
'Hello!This?Is!What?I!Want'
　　
>>> re.split('[!?]',s)
　　
['Hello', 'This', 'Is', 'What', 'I', 'Want']
　　i = "25 192.168.19.32 192.168.1.30 env -i X='() { (a)=>\' bash -c 'echo date'; cat echo"
　　>>> re.split(r'[ \[\]]',i)
　　['25', '192.168.19.32', '192.168.1.30', '', '2014.09.28', '09:01:35', 'env', '-i
　　', "X='()", '{', "(a)=>'", 'bash', '-c', "'echo", "date';", 'cat', 'echo']
　　>>> re.findall(r'[\w.:]+',i)
　　['25', '192.168.19.32', '192.168.1.30', '2014.09.28', '09:01:35', 'env', 'i', 'X
　　', 'a', 'bash', 'c', 'echo', 'date', 'cat', 'echo']
　　>>> re.findall(r'[^]^[\s]+',i)
　　['25', '192.168.19.32', '192.168.1.30', '2014.09.28', '09:01:35', 'env', '-i', "
　　X='()", '{', "(a)=>'", 'bash', '-c', "'echo", "date';", 'cat', 'echo']
　　2.例子
　　test.txt:
　　abcdstr1
　　astr2m supernbajack
　　jordon
#coding: utf-8　　
import re
　　

　　
def split_on_separators(original, separators):
　　
# 这个是用正则实现的，可能不满足要求，不过非常简单
　　
# return filter(lambda x:x.strip(), re.split(r"[%s]" % separators, original))
　　

　　
result =
　　
for sep in separators:
　　
   temp = []
　　
   for r in result:
　　
         temp.extend(filter(lambda x:x.strip(), r.split(sep)))
　　
   result = temp
　　
return result
　　

　　
if __name__ == "__main__":
　　
line = open('test.txt','r').read()
　　
print split_on_separators(line, "\t\n")
　　
#print split_on_separators("a\tb\nc\tdstr1\nastr2 a\tlisa", "\t\n")
　　打印结果：
　　['a', 'b', 'c', 'dstr1', 'astr2', 'm super', 'nba', 'jack', 'jordon']
　　————————————————————
for r in result:　　
         temp.extend(filter(lambda x:x.strip(), r.split(sep)))
　　对这句的理解：
>>> a = ' 123\ra\tabc'　　
>>> a.strip()
　　
'123\ra\tabc'
　　
>>>filter(lambda x:x.strip(),' 123\ra\tabc')
　　
'123aabc'
　　

　　

　　
>>> filter(lambda x:x.strip(),'a\tb\nc\tdstr1\nastr2 a\tlisa')
　　
'abcdstr1astr2alisa'
　　
>>> b = filter(lambda x:x.strip(),'')
　　
>>> b
　　
''
　　

　　
>>> temp = []
　　
>>> temp.extend(filter(lambda x:x.strip(),'a'.split('\n\t')))
　　
>>> temp
　　
['a']
　　
>>> temp
　　
['a']
　　
>>> temp.extend(filter(lambda x:x.strip(),'str1'.split('\n\t')))
　　
>>> temp
　　
['a', 'str1']
　　
>>> temp.extend(filter(lambda x:x.strip(),'str2'))
　　
>>> temp
　　
['a', 'str1', 's', 't', 'r', '2']
　　

　　

　　
>>> temp = []
　　
>>> temp.extend(filter(lambda x:x.strip(),' 123\ra\tabc'))
　　
>>> temp
　　
['1', '2', '3', 'a', 'a', 'b', 'c']
　　
>>> temp.extend(filter(lambda x:x.strip(),' 123\ra\tabc'.split('\n\t')))
　　
>>> temp
　　
['1', '2', '3', 'a', 'a', 'b', 'c', ' 123\ra\tabc']
　　——————————————————————————————————
　　上面例子的应用，将含有多列的普通文本写到excel中（本例split分隔符为空格和\n）：
#coding: utf-8　　
import re,xlrd,xlwt
　　

　　
def split_on_separators(original, separators):
　　
# 这个是用正则实现的，可能不满足要求，不过非常简单
　　
# return filter(lambda x:x.strip(), re.split(r"[%s]" % separators, original))
　　

　　
result =
　　
for sep in separators:
　　
   temp = []
　　
   for r in result:
　　
         temp.extend(filter(lambda x:x.strip(), r.split(sep)))
　　
   result = temp
　　
return result
　　

　　
line = open('ex.txt','r').read()
　　
lisa = split_on_separators(line, " \n")
　　

　　

　　
def wexcel(infile,outefile):
　　
buf = lisa
　　
print buf
　　

　　
w=xlwt.Workbook()
　　
sheet=w.add_sheet('sheet1')
　　
for i in range(len(buf)):
　　
   print buf
　　
   sheet.write(i,0,buf.decode('utf8'))
　　
w.save(outefile)
　　

　　
wexcel('ex.txt','ex.xls')
　　打印结果(ex.txt写到ex.xls中)：

　　ex.txt：
　　/                24%
　　/backup          62%
　　/project          20%
　　memory       26.16%
　　————————————————后续———————————————
　　test.txt：
　　/                24%
　　/backup          62%
　　/project          20%
　　memory       26.16%
　　line = file('D:/python27/test.txt','r').read()
　　>>> line.split('\n')
　　['/                24%', '/backup          62%', '/project          20%'
　　, 'memory       26.16%']
　　>>> line.split()
　　['/', '24%', '/backup', '62%', '/project', '20%', 'memory', '26.16%']
　　>>> filter(lambda x:x.strip(),line.split())
　　['/', '24%', '/backup', '62%', '/project', '20%', 'memory', '26.16%']
　　>>> for i in :
　　... print i.split('\n')
　　...
　　['/                24%', '/backup          62%', '/project          20%'
　　, 'memory       26.16%']
　　>>> for i in :
　　... print filter(lambda x:x.strip(), i.split('\n'))
　　...
　　['/                24%', '/backup          62%', '/project          20%'
　　>>> filter(lambda x:x.strip(),line)
　　'/24%/backup62%/project20%memory26.16%'
　　>>> for i in line.split('\n'):
　　... for sep in i.split(' '):
　　...          temp = []
　　...          temp.extend(filter(lambda x:x.strip(),i.split(' ')))
　　... print temp
　　...
　　['/', '24%']
　　['/backup', '62%']
　　['/project', '20%']
　　['memory', '26.16%']
　　>>> for i in line.split('\n'):
　　... for sep in i.split(' '):
　　...          temp = []
　　...          temp.extend(filter(lambda x:x.strip(),sep.split(' ')))
　　... print sep
　　...
　　24%
　　62%
　　20%
　　26.16%

页: [1]

运维网's Archiver

python split多个分隔符