Welcome, guest | Sign In | My Account | Store | Cart

# This script takes data file, and extra attribute as inputs, and gives output
# data file in a particular format. It also addes extra attribute in beginning
# of each line in the output file.
# Additionally, if the number of fields in a line in the input file does not
# match with the number of pre-defined fields, then such lines are filtered
# and are stored in a discard output file.
# Version: 0.1     13-Feb-2007  Rajkumar Jain          Initial Version


# Input file format is as following.
#  01_attr_03|01_attr_01|01_attr_05|01_attr_02|01_attr_04|
#  02_attr_03|02_attr_01|02_attr_05|02_attr_02||
#  03_attr_03|03_attr_01|03_attr_05|03_attr_02|
#  04_attr_03|04_attr_01|04_attr_05|
#  05_attr_03|05_attr_01|05_attr_05|||05_attr_04|
#  06_attr_03|06_attr_01|06_attr_05|06_attr_02|06_attr_04|

= @ARGV;

# If wrong number of arguments are passed to Perl script, then exit
if ($num_args != 2)
print "Incorrect number of arguments to perl \n";
exit 1;


# Array containing column names (headers) in pre-defined order
# This can be changed as needed
@col_names = (    "ATTRIBUTE01",


= 0;

# Defining the output data file and discard data file names

# Defining File handles for input and output files
(INPUT, "<$inp_file") || die "Can't open file for reading";
(OUTPUT,">$output_file") || die "Can't open the output file for writing";

# Printing header names in the first line of output file
print OUTPUT "$extra_attr"."|";

foreach $column_name (@col_names) {
print OUTPUT "$column_name"."|";

print OUTPUT "\n";

# Loop to itertate through each line of input file
= 0;

# This processing is for the first line (containing headers).
# We re-order the columns as per the order defined in array @col_names
if($file_line_count == 1)
@line_items = split(/\|/);
= @line_items;
=$file_line_count +1;

# Prepare a hash map between headercolumns in the input file
# and its index in the the col array
for ($i=0; $i<$header_count - 1;$i++)
for ($j=0; $j<=$#col_names ;$j++) {
if($line_items[$i] eq $col_names[$j] )  {
{$col_names[$j]} = $i;
# Assign "" to all those columns which dont contain values
for ($j=0; $j<=$#col_names ;$j++) {
if ($HashOrder{$col_names[$j]} != 0 && !($HashOrder{$col_names[$j]}) ) {
{$col_names[$j]} = "";

#Get the current line
# Remove the spaces from beginning and end of the current line
# This is similar to trimming the line
=~ s/^\s//;
=~ s/\s$//;

if ($cur_file_line ne "")
@line_items = split(/\|/);
= @line_items;
# If the number of values in the line matches with the number of headers,
# then we store the line in the output file
if($elements_count ==  $header_count)  
#rearrange according to the column positions
for ($j=0; $j<=$#col_names ;$j++) {

if($HashOrder{$col_names[$j]} ne "")
= $file_line.$line_items[$HashOrder{$col_names[$j]}];

= $file_line."|";

# $extra_attr is prefixed in every line and then line
# is printed in output file
= $extra_attr."|".$file_line."\n";
print OUTPUT "$file_line";
# If the number of values in the line does not match with the number of headers,
# then we store the line in the discard output file
# The message below (The following.... ) is printed only once in
# the discard output file. This is taken care of by value of $flag.
if ($flag == 0)
(DISCARD,">$discard_file") || die "Can't open the discard file for writing";
print DISCARD "The following lines failed the field count validation \n";
# Printing the line of discard data in discard output file  
print DISCARD "$cur_file_line"."\n";
}  #End of ($cur_file_line ne "")
}  #End of While

# Closing file handles

if($flag == 1)

