
我已经sorting的数据看起来像这样(从MS Excel转储到命令行与ssgrep ):

2012-06-27T22:55:00 2012-06-27T23:00:00 2012-06-27T23:05:00 2012-06-27T23:10:00 2012-06-27T23:15:00 2012-06-27T23:20:00 2012-06-27T23:25:00 2012-06-27T23:30:00 

现在我想把它写成一个脚本(或任何命令行工具),它testing所有值是否确实是有效的date,以及它们之间是否有间隔不是5分钟的间隔。 (我需要做一样的不同的数据集采样间隔1小时)

有没有可用于命令行的工具可以做到这一点? 我知道在R或Perl中编写一个小脚本很容易(实际上我已经开始编写这个脚本)。 但是也许在linux / solaris上已经预装了一个我不知道的实用程序。

我猜想没有任何工具可以解决你的任务,但是可以用Perl / Python oneliner(或者非常小的脚本)来完成。

 $ cat 1.pl use Date::Parse; while(<>) { next if /^\s*$/; $time = str2time($_); if ($time) { print $time,"\n"; if ($last and $time - $last != 300) { print "delta is wrong: ".($last-$time)."\n"; } $last = $time; } else { print "not a date","\n"; } } $ cat 1.txt 2012-06-27T22:55:00 2012-06-27T23:00:00 wrong 2012-06-27T23:05:00 2012-06-27T23:10:00 2012-06-27T23:15:01 2012-06-27T23:20:00 2012-06-27T23:25:00 2012-06-27T23:30:00 $ cat 1.txt | perl 1.pl 1340830500 1340830800 not a date 1340831100 1340831400 1340831701 delta is wrong: -301 1340832000 delta is wrong: -299 1340832300 1340832600 


 (your grep etc commands) | while read date; do curDate=$(date +%s -d "$date" 2> /dev/null); if [ "$curDate" != "" ]; then if [[ "$last" != "" && $((curDate - last)) -ne 300 ]]; then echo "wrong delta: " $((curDate - last)); fi last=$curDate; else echo "not a date"; fi done 


 from datetime import datetime, timedelta import dateutil.parser # fake console input input = "2012-06-27T22:55:00 \ 2012-06-27T23:00:00 \ 2012-06-27T23:05:00 \ 2012-06-27T23:10:00 \ 2012-06-27T23:15:00 \ 2012-06-27T23:20:00 \ 2012-06-27T23:25:00 \ 2012-06-27T23:30:00" def parse(s): """Returns datetime if parseable, otherwise None""" try: return dateutil.parser.parse(s) except ValueError: return None # not valid date string # parse the times, remove Nones dates = filter(None, (parse(i) for i in input.split())) # pair every second datetime together dates = zip(dates[0::2], dates[1::2]) # print every datetime pair that has a delta of exactly five minutes for d in dates: if (d[1] - d[0]).seconds == (5*60): print d[0] print d[1] 

这是我自己的重量级解决scheme(正在进行中)。 感谢所有有贡献的人。

 #!/usr/bin/env perl # This script checks if all input date values have a delta of # say, 5 minutes to the previous date value. # # $Id: test_datetintervals.t 1804 2012-08-07 10:35:48Z knb $ # # for f in $(ls -1 *std--ytd--2012-m01*xlsx); do echo ""; echo "$f"; ssgrep 2012- $f | test_datetintervals.t -num 60 -comm "$(basename $f)"; done # # ssgrep 2012 Monitoring_DailyAvg.min--2008-01-01--2012-11-01.txt.xlsx | grep -v "2012.15" | ./test_datetintervals.t -num 0 -interval day -pat \'%m/%d/%Y use Modern::Perl; use Getopt::Long; use File::Basename; use Carp qw (carp croak confess cluck); use lib '/config'; use lib 'T:'; use lib '/lib/'; # wb3 use Pod::Usage; use Test::More; use DateTime; use DateTime::Format::Strptime; my $interval = "minute"; # 2012-06-27T23:05:00 my $n = 5; my $comment = ""; my $help; my $man; my $pat = '%FT%T', # 2012-10-09T17:00:00 #my $pat = "'%m/%d/%Y", # 09/27/2012 my $argcnt = scalar(@ARGV); GetOptions( 'num=i' => \$n, 'interval:s' => \$interval, 'comment:s' => \$comment, 'help|?' => \$help, man => \$man, 'pattern:s' => \$pat ) or pod2usage(2); my $fn = basename($0); my $msg2 = "This script must be part of a unix pipe. Script checks if all input date values have a delta of $n ${interval}s to the previous date value."; pod2usage( -message => "$msg2\nCall $fn --man to see full documentation", -exitstatus => 1 ) if $help; my $msg = <<'MSG'; # Sample calls: # # for f in $(ls -1 *std--ytd--2012.txt.xlsx); do echo ""; echo "$f"; ssgrep 2012- $f | test_datetintervals.t -num 60 -comm "$(basename $f)"; done # MSG pod2usage( -message => "$msg2\n\n$msg", -exitstatus => 0 ) if $man; pod2usage( -message => "# \n!!!! Script was called with no arguments- but this cannot work.\n\n $msg2\n\n$msg", -exitstatus => 0 ) if $argcnt == 0; my $Strp = new DateTime::Format::Strptime( pattern => $pat, locale => 'de_DE', time_zone => 'floating', on_error => \&my_carp ); note($msg2); my $line; my $prev; my $okflag = 1; my $i = 0; while (<>) { $line = $_; chomp $line; my $dt = $Strp->parse_datetime($line); if ( !$dt ) { note "Line $.: cannot parse '$line' to datetime object: $!"; } if ( !$prev ) { note "Line $.: previous value not found/not applicable!"; note $dt->datetime(); } else { my $duration = $dt->delta_ms($prev); my $diff = $duration->in_units("${interval}s"); if ( $diff != $n ) { fail( "$.: $n $interval interval between " . $prev->datetime() . " and " . $dt->datetime() ); note "prev: " . $prev->datetime(); note " : " . $dt->datetime(); note "diff: $diff ${interval}s"; note ""; $okflag = 0; } $i++; } $prev = $dt; } if ($okflag){ ok(1, "no gaps of $n ${interval}s detected! $comment"); note ""; } note("done testing '$comment', about $i lines"); note ""; done_testing(); sub my_carp { carp " line $.: Something wrong with " . $_; note $_[1]; return 1; # 0 for continue } =pod usage: $fn.pl [-?] [long options...] -? --usage --help Prints this usage information. --man --num=i # 5 --interval # minutes --comment --pattern #'%FT%T', # 2012-10-09T17:00:00 --overwrite (or nooverwrite) =cut