python 列表操作

2012-06-09

python 列表操作求助 13:39:41.990623 IP 192.168.90.20.1096 65.54.51.253.443: Flags [S] 13:39:41.

python 列表操作求助
> 13:39:41.990623 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [S]
> 13:39:41.990650 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:41.990652 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016317 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016322 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016324 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016774 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016878 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016886 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016888 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016891 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016894 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.018229 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.372091 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.699621 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.710105 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.710353 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [P.]
> 13:39:42.721820 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.721851 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.755950 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.756030 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:43.137791 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

数据格式如上
第一步把IP和Flags之间的数据相同的归类到一起
第二步以[S]为分割将数据分隔开
第三步对分割开的数据取时间的最大值和最小值进行做差即把[S]之间的数据的时间进行做差
最后输出ip x.x.x.x > x.x.x.x 时间差

之前某位大牛帮我写过一个类似的试着修改不过还是没搞明白是怎么把IP相同的归类到一起

下面是之前某大牛帮写的类似的

Python code

#!/usr/bin/python# encoding: utf-8import reimport datetimeimport syspatt = re.compile(r'''  (?P<dt>\d{4}\-\d{2}\-\d{2}\s\d{2}:\d{2}:\d{2})\s  (?P<src>\d+(\.\d+){3})\s  (?P<tag>\d+(\.\d+){3})\s  (?P<port>\d+)  ''', re.I|re.U|re.X)def dataReader(filename):    with open(filename, 'rt') as handle:        for ln in handle:            m = patt.match(ln.strip())            if m:                yield m.groupdict()            else: continuedef s2dt(s, fmt='%Y-%m-%d %H:%M:%S'):    return datetime.datetime.strptime(s, fmt)def dataCollector(filename):    collector = {}    for d in dataReader(filename):        collector.setdefault(            (d['src'],d['tag'],d['port']),[]        ).append(s2dt(d['dt']))    return collectordef delta(timelist):    timelist.sort()    dlist = []    t0 = timelist.pop(0)    for t in timelist:        d = (t - t0).total_seconds()        t0 = t        if d < 10:            continue        dlist.append(d)    return countdlist(dlist)def countdlist(dlist):    dd, totalcnt = {}, 0    for d in dlist:        totalcnt += 1        dd.setdefault(d,[]).append(d)    lst = [(len(dd[d]),d) for d in dd]    if not lst:        return None    lst.sort()    cnt, dur = lst[-1]    return cnt, dur, '%.2f'%(100.*cnt/totalcnt)for category, timelist in dataCollector(r'/home/ip.data').items():    #print (timelist)]    buff = delta(timelist)    if buff != None:        print category, buff

[解决办法]

Python code

#!/usr/bin/env pythonfrom datetime import datetime, timedeltawith open('data.txt') as fd:    adict = {}    for line in fd:        data = line.split()        ntime, sip, op, dip, flag = data[1], data[3], data[4], data[5], data[7]        if flag == '[S]':            akey = "%s => %s" % (sip, dip)            ntime = datetime.strptime(ntime, '%H:%M:%S.%f')            if akey in adict:                otime = adict[akey]                dur = ntime - otime                print akey, dur            adict[akey] = ntime

热点排行

perl python

python 列表操作