#!/usr/bin/perl -w # Created by Stewart Weiss 09/22/2006 # # Usage: # extractstudents rawdatafile # # This is a script that extracts student fields (username,lastname, firstname, # email address) from the cut-and-pasted list that Blackboard generates # when the "List All" link is clicked in the Control Panel:List/Modify User # page. # Specifically, it sends to standard output a file consisting of lines of the # form # username"Firstname Lastname"email_address # in which # username is the part of the email address preceding the "@" # # The script filters out any users who are not of type "Student" such as # the instructor for the course. # # Report errors to stewart.weiss@hunter.cuny.edu my (@Chunks, @FirstPart, @SecondPart, $lastname, $firstname, $email, $username, $type); while ( <> ) { chomp; # skip blank lines if ( /^[ ]*$/ ) { next; } # This script is robust: it can handle the case when names are # multi-word, such as "De La Rosa, Gina" and "Kim Lee, Xu Lin". # It uses the numeric field that Blackboard generates to separate # name fields from the rest of the line. # If there are two fields before the string of 20 numerals, then # they are the lastname and firstname. If there are three or more # then the script finds the words before the comma and makes them # the last name and everything after is made the first name. @Chunks = split /[0-9]{10,}/; # split on numeric field, but not exact 20 @FirstPart = split(/\s+/, $Chunks[0]); # split part before numeric field on space @SecondPart = split(/\s+/, $Chunks[1]);# split part after on space also # Both parts begins with a space, so the first field will be empty in each # case. For the SecondPart array, the email address should be the second # list item and the type, the third, e.g. # ------------------------------------------- # [ empty | email address | type of user ] # ------------------------------------------- ($email, $type) = @SecondPart[1,2]; # If the type is not "Student" skip the line if ( $type ne "Student") { next; } # Now extract first and last names. Remember that it begins with a # leading empty field. if ( @FirstPart <= 3 ) { # Both names are simple single words names ($lastname, $firstname) = @FirstPart[1,2]; $lastname =~ s/,//; # strip comma from lastname } else { # One of the names is a two-word name like Yuk Shan # The comma indicates which it is, so separate the words # relative to the comma my $i = 1; while ( ( $i < @FirstPart ) and ( $FirstPart[$i] !~ /\w+,/ ) ) { $i++; } if ( $i >= @FirstPart ) { next; } else { # found the word ending in comma -- set up last and first names $lastname = join( ' ', @FirstPart[1 .. $i]); $lastname =~ s/,//; # strip comma from lastname $firstname = join( ' ', @FirstPart[$i+1 .. $#FirstPart]); } } $username = $email; $username =~ s/\@.*$//; print "$username\t\"$firstname $lastname\"\t$email\n"; }